compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31
  32 -- Size of a PPC memory address, in bytes.
  33 --
  34 archWordSize :: Size
  35 #if i386_TARGET_ARCH
  36 archWordSize    = II32
  37 #elif x86_64_TARGET_ARCH
  38 archWordSize    = II64
  39 #else
  40 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  41 #endif
  42
  43 -- | Instruction instance for x86 instruction set.
  44 instance Instruction Instr where
  45         regUsageOfInstr         = x86_regUsageOfInstr
  46         patchRegsOfInstr        = x86_patchRegsOfInstr
  47         isJumpishInstr          = x86_isJumpishInstr
  48         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  49         patchJumpInstr          = x86_patchJumpInstr
  50         mkSpillInstr            = x86_mkSpillInstr
  51         mkLoadInstr             = x86_mkLoadInstr
  52         takeDeltaInstr          = x86_takeDeltaInstr
  53         isMetaInstr             = x86_isMetaInstr
  54         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  55         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  56         mkJumpInstr             = x86_mkJumpInstr
  57
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Intel x86 instructions
  61
  62 {-
  63 Intel, in their infinite wisdom, selected a stack model for floating
  64 point registers on x86.  That might have made sense back in 1979 --
  65 nowadays we can see it for the nonsense it really is.  A stack model
  66 fits poorly with the existing nativeGen infrastructure, which assumes
  67 flat integer and FP register sets.  Prior to this commit, nativeGen
  68 could not generate correct x86 FP code -- to do so would have meant
  69 somehow working the register-stack paradigm into the register
  70 allocator and spiller, which sounds very difficult.
  71
  72 We have decided to cheat, and go for a simple fix which requires no
  73 infrastructure modifications, at the expense of generating ropey but
  74 correct FP code.  All notions of the x86 FP stack and its insns have
  75 been removed.  Instead, we pretend (to the instruction selector and
  76 register allocator) that x86 has six floating point registers, %fake0
  77 .. %fake5, which can be used in the usual flat manner.  We further
  78 claim that x86 has floating point instructions very similar to SPARC
  79 and Alpha, that is, a simple 3-operand register-register arrangement.
  80 Code generation and register allocation proceed on this basis.
  81
  82 When we come to print out the final assembly, our convenient fiction
  83 is converted to dismal reality.  Each fake instruction is
  84 independently converted to a series of real x86 instructions.
  85 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  86 arithmetic operations, the two operands are pushed onto the top of the
  87 FP stack, the operation done, and the result copied back into the
  88 relevant register.  There are only six %fake registers because 2 are
  89 needed for the translation, and x86 has 8 in total.
  90
  91 The translation is inefficient but is simple and it works.  A cleverer
  92 translation would handle a sequence of insns, simulating the FP stack
  93 contents, would not impose a fixed mapping from %fake to %st regs, and
  94 hopefully could avoid most of the redundant reg-reg moves of the
  95 current translation.
  96
  97 We might as well make use of whatever unique FP facilities Intel have
  98 chosen to bless us with (let's not be churlish, after all).
  99 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 100 -}
 101
 102 {-
 103 MORE FLOATING POINT MUSINGS...
 104
 105 Intel's internal floating point registers are by default 80 bit
 106 extended precision.  This means that all operations done on values in
 107 registers are done at 80 bits, and unless the intermediate values are
 108 truncated to the appropriate size (32 or 64 bits) by storing in
 109 memory, calculations in registers will give different results from
 110 calculations which pass intermediate values in memory (eg. via
 111 function calls).
 112
 113 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 114 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 115 that this will only affect 64-bit precision arithmetic; 32-bit
 116 calculations will still be done at 64-bit precision in registers.  So
 117 it doesn't solve the whole problem.
 118
 119 There's also the issue of what the C library is expecting in terms of
 120 precision.  It seems to be the case that glibc on Linux expects the
 121 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 122 unexpected effects.  Changing the default could have undesirable
 123 effects on other 3rd-party library code too, so the right thing would
 124 be to save/restore the FPU control word across Haskell code if we were
 125 to do this.
 126
 127 gcc's -ffloat-store gives consistent results by always storing the
 128 results of floating-point calculations in memory, which works for both
 129 32 and 64-bit precision.  However, it only affects the values of
 130 user-declared floating point variables in C, not intermediate results.
 131 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 132 flag).
 133
 134 Another problem is how to spill floating point registers in the
 135 register allocator.  Should we spill the whole 80 bits, or just 64?
 136 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 137 Linux, spilling 64 bits will round the results of some operations.
 138 This is what gcc does.  Spilling at 80 bits requires taking up a full
 139 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 140 the alignment problems.
 141
 142 In the future, we'll use the SSE registers for floating point.  This
 143 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 144 precision float ops), which means P4 or Xeon and above.  Using SSE
 145 will solve all these problems, because the SSE registers use fixed 32
 146 bit or 64 bit precision.
 147
 148 --SDM 1/2003
 149 -}
 150
 151
 152 data Instr
 153         -- comment pseudo-op
 154         = COMMENT FastString
 155
 156         -- some static data spat out during code
 157         -- generation.  Will be extracted before
 158         -- pretty-printing.
 159         | LDATA   Section [CmmStatic]
 160
 161         -- start a new basic block.  Useful during
 162         -- codegen, removed later.  Preceding
 163         -- instruction should be a jump, as per the
 164         -- invariants for a BasicBlock (see Cmm).
 165         | NEWBLOCK BlockId
 166
 167         -- specify current stack offset for
 168         -- benefit of subsequent passes
 169         | DELTA   Int
 170
 171         -- Moves.
 172         | MOV         Size Operand Operand
 173         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 174         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 175         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 176         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 177         -- don't affect the high bits of the register.
 178
 179         -- Load effective address (also a very useful three-operand add instruction :-)
 180         | LEA         Size Operand Operand
 181
 182         -- Int Arithmetic.
 183         | ADD         Size Operand Operand
 184         | ADC         Size Operand Operand
 185         | SUB         Size Operand Operand
 186
 187         | MUL         Size Operand Operand
 188         | IMUL        Size Operand Operand      -- signed int mul
 189         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 190
 191         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 192         | IDIV        Size Operand              -- ditto, but signed
 193
 194         -- Simple bit-twiddling.
 195         | AND         Size Operand Operand
 196         | OR          Size Operand Operand
 197         | XOR         Size Operand Operand
 198         | NOT         Size Operand
 199         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 200
 201         -- Shifts (amount may be immediate or %cl only)
 202         | SHL         Size Operand{-amount-} Operand
 203         | SAR         Size Operand{-amount-} Operand
 204         | SHR         Size Operand{-amount-} Operand
 205
 206         | BT          Size Imm Operand
 207         | NOP
 208
 209         -- x86 Float Arithmetic.
 210         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 211         -- as single instructions right up until we spit them out.
 212         -- all the 3-operand fake fp insns are src1 src2 dst
 213         -- and furthermore are constrained to be fp regs only.
 214         -- IMPORTANT: keep is_G_insn up to date with any changes here
 215         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 216         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 217         | GST         Size Reg AddrMode -- src(fpreg), dst
 218
 219         | GLDZ        Reg -- dst(fpreg)
 220         | GLD1        Reg -- dst(fpreg)
 221
 222         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 223         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 224
 225         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 226         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 227
 228         | GADD        Size Reg Reg Reg -- src1, src2, dst
 229         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 230         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 231         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 232
 233                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 234                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 235                 -- comparable and the comparison is True.  Subsequent code must
 236                 -- test the %eflags zero flag regardless of the supplied Cond.
 237         | GCMP        Cond Reg Reg -- src1, src2
 238
 239         | GABS        Size Reg Reg -- src, dst
 240         | GNEG        Size Reg Reg -- src, dst
 241         | GSQRT       Size Reg Reg -- src, dst
 242         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 243         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 244         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 245
 246         | GFREE         -- do ffree on all x86 regs; an ugly hack
 247
 248
 249         -- SSE2 floating point: we use a restricted set of the available SSE2
 250         -- instructions for floating-point.
 251         -- use MOV for moving (either movss or movsd (movlpd better?))
 252         | CVTSS2SD      Reg Reg         -- F32 to F64
 253         | CVTSD2SS      Reg Reg         -- F64 to F32
 254         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 255         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 256         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 257         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 258
 259         -- use ADD & SUB for arithmetic.  In both cases, operands
 260         -- are  Operand Reg.
 261
 262         -- SSE2 floating-point division:
 263         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 264
 265         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 266         -- compare single/double prec floating point respectively.
 267
 268         | SQRT          Size Operand Reg        -- src, dst
 269
 270
 271         -- Comparison
 272         | TEST          Size Operand Operand
 273         | CMP           Size Operand Operand
 274         | SETCC         Cond Operand
 275
 276         -- Stack Operations.
 277         | PUSH          Size Operand
 278         | POP           Size Operand
 279         -- both unused (SDM):
 280         --  | PUSHA
 281         --  | POPA
 282
 283         -- Jumping around.
 284         | JMP         Operand
 285         | JXX         Cond BlockId  -- includes unconditional branches
 286         | JXX_GBL     Cond Imm      -- non-local version of JXX
 287         | JMP_TBL     Operand [BlockId]  -- table jump
 288         | CALL        (Either Imm Reg) [Reg]
 289
 290         -- Other things.
 291         | CLTD Size              -- sign extend %eax into %edx:%eax
 292
 293         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 294                                  -- pretty-prints as
 295                                  --       call 1f
 296                                  -- 1:    popl %reg
 297                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 298         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 299                                  -- pretty-prints as
 300                                  --       call 1f
 301                                  -- 1:    popl %reg
 302
 303
 304 data Operand
 305         = OpReg  Reg            -- register
 306         | OpImm  Imm            -- immediate value
 307         | OpAddr AddrMode       -- memory reference
 308
 309
 310
 311 x86_regUsageOfInstr :: Instr -> RegUsage
 312 x86_regUsageOfInstr instr
 313  = case instr of
 314     MOV    _ src dst    -> usageRW src dst
 315     MOVZxL _ src dst    -> usageRW src dst
 316     MOVSxL _ src dst    -> usageRW src dst
 317     LEA    _ src dst    -> usageRW src dst
 318     ADD    _ src dst    -> usageRM src dst
 319     ADC    _ src dst    -> usageRM src dst
 320     SUB    _ src dst    -> usageRM src dst
 321     IMUL   _ src dst    -> usageRM src dst
 322     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 323     MUL    _ src dst    -> usageRM src dst
 324     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 325     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 326     AND    _ src dst    -> usageRM src dst
 327     OR     _ src dst    -> usageRM src dst
 328
 329     XOR    _ (OpReg src) (OpReg dst)
 330         | src == dst    -> mkRU [] [dst]
 331
 332     XOR    _ src dst    -> usageRM src dst
 333     NOT    _ op         -> usageM op
 334     NEGI   _ op         -> usageM op
 335     SHL    _ imm dst    -> usageRM imm dst
 336     SAR    _ imm dst    -> usageRM imm dst
 337     SHR    _ imm dst    -> usageRM imm dst
 338     BT     _ _   src    -> mkRUR (use_R src)
 339
 340     PUSH   _ op         -> mkRUR (use_R op)
 341     POP    _ op         -> mkRU [] (def_W op)
 342     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 343     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 344     SETCC  _ op         -> mkRU [] (def_W op)
 345     JXX    _ _          -> mkRU [] []
 346     JXX_GBL _ _         -> mkRU [] []
 347     JMP     op          -> mkRUR (use_R op)
 348     JMP_TBL op _        -> mkRUR (use_R op)
 349     CALL (Left _)  params   -> mkRU params callClobberedRegs
 350     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 351     CLTD   _            -> mkRU [eax] [edx]
 352     NOP                 -> mkRU [] []
 353
 354 #if i386_TARGET_ARCH
 355     GMOV   src dst      -> mkRU [src] [dst]
 356     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 357     GST    _ src dst    -> mkRUR (src : use_EA dst)
 358
 359     GLDZ   dst          -> mkRU [] [dst]
 360     GLD1   dst          -> mkRU [] [dst]
 361
 362     GFTOI  src dst      -> mkRU [src] [dst]
 363     GDTOI  src dst      -> mkRU [src] [dst]
 364
 365     GITOF  src dst      -> mkRU [src] [dst]
 366     GITOD  src dst      -> mkRU [src] [dst]
 367
 368     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 369     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 370     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372
 373     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 374     GABS   _ src dst     -> mkRU [src] [dst]
 375     GNEG   _ src dst     -> mkRU [src] [dst]
 376     GSQRT  _ src dst     -> mkRU [src] [dst]
 377     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 378     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 379     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 380 #endif
 381
 382 #if x86_64_TARGET_ARCH
 383     CVTSS2SD   src dst  -> mkRU [src] [dst]
 384     CVTSD2SS   src dst  -> mkRU [src] [dst]
 385     CVTTSS2SIQ src dst  -> mkRU (use_R src) [dst]
 386     CVTTSD2SIQ src dst  -> mkRU (use_R src) [dst]
 387     CVTSI2SS   src dst  -> mkRU (use_R src) [dst]
 388     CVTSI2SD   src dst  -> mkRU (use_R src) [dst]
 389     FDIV _     src dst  -> usageRM src dst
 390 #endif
 391
 392     FETCHGOT reg        -> mkRU [] [reg]
 393     FETCHPC  reg        -> mkRU [] [reg]
 394
 395     COMMENT _           -> noUsage
 396     DELTA   _           -> noUsage
 397
 398     _other              -> panic "regUsage: unrecognised instr"
 399
 400  where
 401     -- 2 operand form; first operand Read; second Written
 402     usageRW :: Operand -> Operand -> RegUsage
 403     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 404     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 405     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 406
 407     -- 2 operand form; first operand Read; second Modified
 408     usageRM :: Operand -> Operand -> RegUsage
 409     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 410     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 411     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 412
 413     -- 1 operand form; operand Modified
 414     usageM :: Operand -> RegUsage
 415     usageM (OpReg reg)          = mkRU [reg] [reg]
 416     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 417     usageM _                    = panic "X86.RegInfo.usageM: no match"
 418
 419     -- Registers defd when an operand is written.
 420     def_W (OpReg reg)           = [reg]
 421     def_W (OpAddr _ )           = []
 422     def_W _                     = panic "X86.RegInfo.def_W: no match"
 423
 424     -- Registers used when an operand is read.
 425     use_R (OpReg reg)  = [reg]
 426     use_R (OpImm _)    = []
 427     use_R (OpAddr ea)  = use_EA ea
 428
 429     -- Registers used to compute an effective address.
 430     use_EA (ImmAddr _ _) = []
 431     use_EA (AddrBaseIndex base index _) =
 432         use_base base $! use_index index
 433         where use_base (EABaseReg r) x = r : x
 434               use_base _ x             = x
 435               use_index EAIndexNone   = []
 436               use_index (EAIndex i _) = [i]
 437
 438     mkRUR src = src' `seq` RU src' []
 439         where src' = filter interesting src
 440
 441     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 442         where src' = filter interesting src
 443               dst' = filter interesting dst
 444
 445 interesting :: Reg -> Bool
 446 interesting (RegVirtual _)              = True
 447 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 448 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 449
 450
 451
 452 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 453 x86_patchRegsOfInstr instr env
 454  = case instr of
 455     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 456     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 457     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 458     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 459     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 460     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 461     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 462     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 463     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 464     MUL sz src dst      -> patch2 (MUL sz) src dst
 465     IDIV sz op          -> patch1 (IDIV sz) op
 466     DIV sz op           -> patch1 (DIV sz) op
 467     AND  sz src dst     -> patch2 (AND  sz) src dst
 468     OR   sz src dst     -> patch2 (OR   sz) src dst
 469     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 470     NOT  sz op          -> patch1 (NOT  sz) op
 471     NEGI sz op          -> patch1 (NEGI sz) op
 472     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 473     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 474     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 475     BT   sz imm src     -> patch1 (BT  sz imm) src
 476     TEST sz src dst     -> patch2 (TEST sz) src dst
 477     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 478     PUSH sz op          -> patch1 (PUSH sz) op
 479     POP  sz op          -> patch1 (POP  sz) op
 480     SETCC cond op       -> patch1 (SETCC cond) op
 481     JMP op              -> patch1 JMP op
 482     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 483
 484 #if i386_TARGET_ARCH
 485     GMOV src dst        -> GMOV (env src) (env dst)
 486     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 487     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 488
 489     GLDZ dst            -> GLDZ (env dst)
 490     GLD1 dst            -> GLD1 (env dst)
 491
 492     GFTOI src dst       -> GFTOI (env src) (env dst)
 493     GDTOI src dst       -> GDTOI (env src) (env dst)
 494
 495     GITOF src dst       -> GITOF (env src) (env dst)
 496     GITOD src dst       -> GITOD (env src) (env dst)
 497
 498     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 499     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 500     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 501     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 502
 503     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 504     GABS sz src dst     -> GABS sz (env src) (env dst)
 505     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 506     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 507     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 508     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 509     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 510 #endif
 511
 512 #if x86_64_TARGET_ARCH
 513     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 514     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 515     CVTTSS2SIQ src dst  -> CVTTSS2SIQ (patchOp src) (env dst)
 516     CVTTSD2SIQ src dst  -> CVTTSD2SIQ (patchOp src) (env dst)
 517     CVTSI2SS src dst    -> CVTSI2SS (patchOp src) (env dst)
 518     CVTSI2SD src dst    -> CVTSI2SD (patchOp src) (env dst)
 519     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 520 #endif
 521
 522     CALL (Left _)  _    -> instr
 523     CALL (Right reg) p  -> CALL (Right (env reg)) p
 524
 525     FETCHGOT reg        -> FETCHGOT (env reg)
 526     FETCHPC  reg        -> FETCHPC  (env reg)
 527
 528     NOP                 -> instr
 529     COMMENT _           -> instr
 530     DELTA _             -> instr
 531
 532     JXX _ _             -> instr
 533     JXX_GBL _ _         -> instr
 534     CLTD _              -> instr
 535
 536     _other              -> panic "patchRegs: unrecognised instr"
 537
 538   where
 539     patch1 insn op      = insn $! patchOp op
 540     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 541
 542     patchOp (OpReg  reg) = OpReg $! env reg
 543     patchOp (OpImm  imm) = OpImm imm
 544     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 545
 546     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 547     lookupAddr (AddrBaseIndex base index disp)
 548       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 549       where
 550         lookupBase EABaseNone       = EABaseNone
 551         lookupBase EABaseRip        = EABaseRip
 552         lookupBase (EABaseReg r)    = EABaseReg (env r)
 553
 554         lookupIndex EAIndexNone     = EAIndexNone
 555         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 556
 557
 558 --------------------------------------------------------------------------------
 559 x86_isJumpishInstr
 560         :: Instr -> Bool
 561
 562 x86_isJumpishInstr instr
 563  = case instr of
 564         JMP{}           -> True
 565         JXX{}           -> True
 566         JXX_GBL{}       -> True
 567         JMP_TBL{}       -> True
 568         CALL{}          -> True
 569         _               -> False
 570
 571
 572 x86_jumpDestsOfInstr
 573         :: Instr
 574         -> [BlockId]
 575
 576 x86_jumpDestsOfInstr insn
 577   = case insn of
 578         JXX _ id        -> [id]
 579         JMP_TBL _ ids   -> ids
 580         _               -> []
 581
 582
 583 x86_patchJumpInstr
 584         :: Instr -> (BlockId -> BlockId) -> Instr
 585
 586 x86_patchJumpInstr insn patchF
 587   = case insn of
 588         JXX cc id       -> JXX cc (patchF id)
 589         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 590         _               -> insn
 591
 592
 593
 594
 595 -- -----------------------------------------------------------------------------
 596 -- | Make a spill instruction.
 597 x86_mkSpillInstr
 598         :: Reg          -- register to spill
 599         -> Int          -- current stack delta
 600         -> Int          -- spill slot to use
 601         -> Instr
 602
 603 #if   i386_TARGET_ARCH
 604 x86_mkSpillInstr reg delta slot
 605   = let off     = spillSlotToOffset slot
 606     in
 607     let off_w = (off-delta) `div` 4
 608     in case targetClassOfReg reg of
 609            RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
 610            _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 611
 612 #elif x86_64_TARGET_ARCH
 613 x86_mkSpillInstr reg delta slot
 614   = let off     = spillSlotToOffset slot
 615     in
 616     let off_w = (off-delta) `div` 8
 617     in case targetClassOfReg reg of
 618            RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
 619            RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 620            _         -> panic "X86.mkSpillInstr: no match"
 621                 -- ToDo: will it work to always spill as a double?
 622                 -- does that cause a stall if the data was a float?
 623 #else
 624 x86_mkSpillInstr _ _ _
 625     =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
 626 #endif
 627
 628
 629 -- | Make a spill reload instruction.
 630 x86_mkLoadInstr
 631         :: Reg          -- register to load
 632         -> Int          -- current stack delta
 633         -> Int          -- spill slot to use
 634         -> Instr
 635
 636 #if   i386_TARGET_ARCH
 637 x86_mkLoadInstr reg delta slot
 638   = let off     = spillSlotToOffset slot
 639     in
 640         let off_w = (off-delta) `div` 4
 641         in case targetClassOfReg reg of {
 642               RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
 643               _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
 644 #elif x86_64_TARGET_ARCH
 645 x86_mkLoadInstr reg delta slot
 646   = let off     = spillSlotToOffset slot
 647     in
 648         let off_w = (off-delta) `div` 8
 649         in case targetClassOfReg reg of
 650               RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
 651               _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 652 #else
 653 x86_mkLoadInstr _ _ _
 654         = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
 655 #endif
 656
 657 spillSlotSize :: Int
 658 spillSlotSize = IF_ARCH_i386(12, 8)
 659
 660 maxSpillSlots :: Int
 661 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 662
 663 -- convert a spill slot number to a *byte* offset, with no sign:
 664 -- decide on a per arch basis whether you are spilling above or below
 665 -- the C stack pointer.
 666 spillSlotToOffset :: Int -> Int
 667 spillSlotToOffset slot
 668    | slot >= 0 && slot < maxSpillSlots
 669    = 64 + spillSlotSize * slot
 670    | otherwise
 671    = pprPanic "spillSlotToOffset:"
 672               (   text "invalid spill location: " <> int slot
 673               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 674
 675 --------------------------------------------------------------------------------
 676
 677 -- | See if this instruction is telling us the current C stack delta
 678 x86_takeDeltaInstr
 679         :: Instr
 680         -> Maybe Int
 681
 682 x86_takeDeltaInstr instr
 683  = case instr of
 684         DELTA i         -> Just i
 685         _               -> Nothing
 686
 687
 688 x86_isMetaInstr
 689         :: Instr
 690         -> Bool
 691
 692 x86_isMetaInstr instr
 693  = case instr of
 694         COMMENT{}       -> True
 695         LDATA{}         -> True
 696         NEWBLOCK{}      -> True
 697         DELTA{}         -> True
 698         _               -> False
 699
 700
 701
 702 -- | Make a reg-reg move instruction.
 703 --      On SPARC v8 there are no instructions to move directly between
 704 --      floating point and integer regs. If we need to do that then we
 705 --      have to go via memory.
 706 --
 707 x86_mkRegRegMoveInstr
 708         :: Reg
 709         -> Reg
 710         -> Instr
 711
 712 x86_mkRegRegMoveInstr src dst
 713  = case targetClassOfReg src of
 714 #if   i386_TARGET_ARCH
 715         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 716         RcDouble  -> GMOV src dst
 717         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 718 #else
 719         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 720         RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
 721         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 722 #endif
 723
 724
 725 -- | Check whether an instruction represents a reg-reg move.
 726 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 727 --      by assigning the src and dest temporaries to the same real register.
 728 --
 729 x86_takeRegRegMoveInstr
 730         :: Instr
 731         -> Maybe (Reg,Reg)
 732
 733 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 734         = Just (r1,r2)
 735
 736 x86_takeRegRegMoveInstr _  = Nothing
 737
 738
 739 -- | Make an unconditional branch instruction.
 740 x86_mkJumpInstr
 741         :: BlockId
 742         -> [Instr]
 743
 744 x86_mkJumpInstr id
 745         = [JXX ALWAYS id]
 746
 747
 748
 749
 750
 751 i386_insert_ffrees
 752         :: [GenBasicBlock Instr]
 753         -> [GenBasicBlock Instr]
 754
 755 i386_insert_ffrees blocks
 756    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 757    = map ffree_before_nonlocal_transfers blocks
 758
 759    | otherwise
 760    = blocks
 761   where
 762    ffree_before_nonlocal_transfers (BasicBlock id insns)
 763      = BasicBlock id (foldr p [] insns)
 764      where p insn r = case insn of
 765                         CALL _ _ -> GFREE : insn : r
 766                         JMP _    -> GFREE : insn : r
 767                         _        -> insn : r
 768
 769 -- if you ever add a new FP insn to the fake x86 FP insn set,
 770 -- you must update this too
 771 is_G_instr :: Instr -> Bool
 772 is_G_instr instr
 773    = case instr of
 774         GMOV{}          -> True
 775         GLD{}           -> True
 776         GST{}           -> True
 777         GLDZ{}          -> True
 778         GLD1{}          -> True
 779         GFTOI{}         -> True
 780         GDTOI{}         -> True
 781         GITOF{}         -> True
 782         GITOD{}         -> True
 783         GADD{}          -> True
 784         GDIV{}          -> True
 785         GSUB{}          -> True
 786         GMUL{}          -> True
 787         GCMP{}          -> True
 788         GABS{}          -> True
 789         GNEG{}          -> True
 790         GSQRT{}         -> True
 791         GSIN{}          -> True
 792         GCOS{}          -> True
 793         GTAN{}          -> True
 794         GFREE           -> panic "is_G_instr: GFREE (!)"
 795         _               -> False
 796
 797
 798 data JumpDest = DestBlockId BlockId | DestImm Imm
 799
 800
 801 canShortcut :: Instr -> Maybe JumpDest
 802 canShortcut (JXX ALWAYS id)     = Just (DestBlockId id)
 803 canShortcut (JMP (OpImm imm))   = Just (DestImm imm)
 804 canShortcut _                   = Nothing
 805
 806
 807 -- The helper ensures that we don't follow cycles.
 808 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 809 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 810   where shortcutJump' fn seen insn@(JXX cc id) =
 811           if elemBlockSet id seen then insn
 812           else case fn id of
 813                  Nothing                -> insn
 814                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 815                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 816                where seen' = extendBlockSet seen id
 817         shortcutJump' _ _ other = other
 818
 819 -- Here because it knows about JumpDest
 820 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 821 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 822   | Just uq <- maybeAsmTemp lab
 823   = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
 824 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 825   | Just uq <- maybeAsmTemp lbl1
 826   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
 827         -- slightly dodgy, we're ignoring the second label, but this
 828         -- works with the way we use CmmLabelDiffOff for jump tables now.
 829
 830 shortcutStatic _ other_static
 831         = other_static
 832
 833 shortBlockId
 834         :: (BlockId -> Maybe JumpDest)
 835         -> BlockId
 836         -> CLabel
 837
 838 shortBlockId fn blockid@(BlockId uq) =
 839    case fn blockid of
 840       Nothing -> mkAsmTempLabel uq
 841       Just (DestBlockId blockid')  -> shortBlockId fn blockid'
 842       Just (DestImm (ImmCLbl lbl)) -> lbl
 843       _other -> panic "shortBlockId"