compiler/nativeGen/X86/CodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module X86.CodeGen (
  15         cmmTopCodeGen,
  16         generateJumpTableForInstr,
  17         InstrBlock
  18 )
  19
  20 where
  21
  22 #include "HsVersions.h"
  23 #include "nativeGen/NCG.h"
  24 #include "../includes/MachDeps.h"
  25
  26 -- NCG stuff:
  27 import X86.Instr
  28 import X86.Cond
  29 import X86.Regs
  30 import X86.RegInfo
  31 import Instruction
  32 import PIC
  33 import NCGMonad
  34 import Size
  35 import Reg
  36 import Platform
  37
  38 -- Our intermediate code:
  39 import BasicTypes
  40 import BlockId
  41 import PprCmm           ()
  42 import OldCmm
  43 import OldPprCmm        ()
  44 import CLabel
  45
  46 -- The rest:
  47 import StaticFlags      ( opt_PIC )
  48 import ForeignCall      ( CCallConv(..) )
  49 import OrdList
  50 import Outputable
  51 import Unique
  52 import FastString
  53 import FastBool         ( isFastTrue )
  54 import Constants        ( wORD_SIZE )
  55 import DynFlags
  56
  57 import Control.Monad    ( mapAndUnzipM )
  58 import Data.Maybe       ( fromJust, catMaybes )
  59 import Data.Bits
  60 import Data.Word
  61 import Data.Int
  62
  63 sse2Enabled :: NatM Bool
  64 #if x86_64_TARGET_ARCH
  65 -- SSE2 is fixed on for x86_64.  It would be possible to make it optional,
  66 -- but we'd need to fix at least the foreign call code where the calling
  67 -- convention specifies the use of xmm regs, and possibly other places.
  68 sse2Enabled = return True
  69 #else
  70 sse2Enabled = do
  71   dflags <- getDynFlagsNat
  72   return (dopt Opt_SSE2 dflags)
  73 #endif
  74
  75 if_sse2 :: NatM a -> NatM a -> NatM a
  76 if_sse2 sse2 x87 = do
  77   b <- sse2Enabled
  78   if b then sse2 else x87
  79
  80 cmmTopCodeGen
  81         :: DynFlags
  82         -> RawCmmTop
  83         -> NatM [NatCmmTop Instr]
  84
  85 cmmTopCodeGen dynflags (CmmProc info lab (ListGraph blocks)) = do
  86   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  87   picBaseMb <- getPicBaseMaybeNat
  88   let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
  89       tops = proc : concat statics
  90       os   = platformOS $ targetPlatform dynflags
  91
  92   case picBaseMb of
  93       Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
  94       Nothing -> return tops
  95
  96 cmmTopCodeGen _ (CmmData sec dat) = do
  97   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  98
  99
 100 basicBlockCodeGen
 101         :: CmmBasicBlock
 102         -> NatM ( [NatBasicBlock Instr]
 103                 , [NatCmmTop Instr])
 104
 105 basicBlockCodeGen (BasicBlock id stmts) = do
 106   instrs <- stmtsToInstrs stmts
 107   -- code generation may introduce new basic block boundaries, which
 108   -- are indicated by the NEWBLOCK instruction.  We must split up the
 109   -- instruction stream into basic blocks again.  Also, we extract
 110   -- LDATAs here too.
 111   let
 112         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 113
 114         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 115           = ([], BasicBlock id instrs : blocks, statics)
 116         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 117           = (instrs, blocks, CmmData sec dat:statics)
 118         mkBlocks instr (instrs,blocks,statics)
 119           = (instr:instrs, blocks, statics)
 120   -- in
 121   return (BasicBlock id top : other_blocks, statics)
 122
 123
 124 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 125 stmtsToInstrs stmts
 126    = do instrss <- mapM stmtToInstrs stmts
 127         return (concatOL instrss)
 128
 129
 130 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 131 stmtToInstrs stmt = case stmt of
 132     CmmNop         -> return nilOL
 133     CmmComment s   -> return (unitOL (COMMENT s))
 134
 135     CmmAssign reg src
 136       | isFloatType ty -> assignReg_FltCode size reg src
 137 #if WORD_SIZE_IN_BITS==32
 138       | isWord64 ty    -> assignReg_I64Code      reg src
 139 #endif
 140       | otherwise        -> assignReg_IntCode size reg src
 141         where ty = cmmRegType reg
 142               size = cmmTypeSize ty
 143
 144     CmmStore addr src
 145       | isFloatType ty -> assignMem_FltCode size addr src
 146 #if WORD_SIZE_IN_BITS==32
 147       | isWord64 ty      -> assignMem_I64Code      addr src
 148 #endif
 149       | otherwise        -> assignMem_IntCode size addr src
 150         where ty = cmmExprType src
 151               size = cmmTypeSize ty
 152
 153     CmmCall target result_regs args _ _
 154        -> genCCall target result_regs args
 155
 156     CmmBranch id          -> genBranch id
 157     CmmCondBranch arg id  -> genCondJump id arg
 158     CmmSwitch arg ids     -> genSwitch arg ids
 159     CmmJump arg _         -> genJump arg
 160     CmmReturn _           ->
 161       panic "stmtToInstrs: return statement should have been cps'd away"
 162
 163
 164 --------------------------------------------------------------------------------
 165 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 166 --      They are really trees of insns to facilitate fast appending, where a
 167 --      left-to-right traversal yields the insns in the correct order.
 168 --
 169 type InstrBlock
 170         = OrdList Instr
 171
 172
 173 -- | Condition codes passed up the tree.
 174 --
 175 data CondCode
 176         = CondCode Bool Cond InstrBlock
 177
 178
 179 -- | a.k.a "Register64"
 180 --      Reg is the lower 32-bit temporary which contains the result.
 181 --      Use getHiVRegFromLo to find the other VRegUnique.
 182 --
 183 --      Rules of this simplified insn selection game are therefore that
 184 --      the returned Reg may be modified
 185 --
 186 data ChildCode64
 187    = ChildCode64
 188         InstrBlock
 189         Reg
 190
 191
 192 -- | Register's passed up the tree.  If the stix code forces the register
 193 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 194 --      otherwise, it comes out as @Any@, and the parent can decide which
 195 --      register to put it in.
 196 --
 197 data Register
 198         = Fixed Size Reg InstrBlock
 199         | Any   Size (Reg -> InstrBlock)
 200
 201
 202 swizzleRegisterRep :: Register -> Size -> Register
 203 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 204 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 205
 206
 207 -- | Grab the Reg for a CmmReg
 208 getRegisterReg :: Bool -> CmmReg -> Reg
 209
 210 getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
 211   = let sz = cmmTypeSize pk in
 212     if isFloatSize sz && not use_sse2
 213        then RegVirtual (mkVirtualReg u FF80)
 214        else RegVirtual (mkVirtualReg u sz)
 215
 216 getRegisterReg _ (CmmGlobal mid)
 217   = case globalRegMaybe mid of
 218         Just reg -> RegReal $ reg
 219         Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 220         -- By this stage, the only MagicIds remaining should be the
 221         -- ones which map to a real machine register on this
 222         -- platform.  Hence ...
 223
 224
 225 -- | Memory addressing modes passed up the tree.
 226 data Amode
 227         = Amode AddrMode InstrBlock
 228
 229 {-
 230 Now, given a tree (the argument to an CmmLoad) that references memory,
 231 produce a suitable addressing mode.
 232
 233 A Rule of the Game (tm) for Amodes: use of the addr bit must
 234 immediately follow use of the code part, since the code part puts
 235 values in registers which the addr then refers to.  So you can't put
 236 anything in between, lest it overwrite some of those registers.  If
 237 you need to do some other computation between the code part and use of
 238 the addr bit, first store the effective address from the amode in a
 239 temporary, then do the other computation, and then use the temporary:
 240
 241     code
 242     LEA amode, tmp
 243     ... other computation ...
 244     ... (tmp) ...
 245 -}
 246
 247
 248 -- | Check whether an integer will fit in 32 bits.
 249 --      A CmmInt is intended to be truncated to the appropriate
 250 --      number of bits, so here we truncate it to Int64.  This is
 251 --      important because e.g. -1 as a CmmInt might be either
 252 --      -1 or 18446744073709551615.
 253 --
 254 is32BitInteger :: Integer -> Bool
 255 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 256   where i64 = fromIntegral i :: Int64
 257
 258
 259 -- | Convert a BlockId to some CmmStatic data
 260 jumpTableEntry :: Maybe BlockId -> CmmStatic
 261 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 262 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
 263     where blockLabel = mkAsmTempLabel (getUnique blockid)
 264
 265
 266 -- -----------------------------------------------------------------------------
 267 -- General things for putting together code sequences
 268
 269 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 270 -- CmmExprs into CmmRegOff?
 271 mangleIndexTree :: CmmReg -> Int -> CmmExpr
 272 mangleIndexTree reg off
 273   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 274   where width = typeWidth (cmmRegType reg)
 275
 276 -- | The dual to getAnyReg: compute an expression into a register, but
 277 --      we don't mind which one it is.
 278 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 279 getSomeReg expr = do
 280   r <- getRegister expr
 281   case r of
 282     Any rep code -> do
 283         tmp <- getNewRegNat rep
 284         return (tmp, code tmp)
 285     Fixed _ reg code ->
 286         return (reg, code)
 287
 288
 289
 290
 291
 292 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 293 assignMem_I64Code addrTree valueTree = do
 294   Amode addr addr_code <- getAmode addrTree
 295   ChildCode64 vcode rlo <- iselExpr64 valueTree
 296   let
 297         rhi = getHiVRegFromLo rlo
 298
 299         -- Little-endian store
 300         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 301         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 302   -- in
 303   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 304
 305
 306 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 307 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
 308    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 309    let
 310          r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
 311          r_dst_hi = getHiVRegFromLo r_dst_lo
 312          r_src_hi = getHiVRegFromLo r_src_lo
 313          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 314          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 315    -- in
 316    return (
 317         vcode `snocOL` mov_lo `snocOL` mov_hi
 318      )
 319
 320 assignReg_I64Code _ _
 321    = panic "assignReg_I64Code(i386): invalid lvalue"
 322
 323
 324
 325
 326 iselExpr64        :: CmmExpr -> NatM ChildCode64
 327 iselExpr64 (CmmLit (CmmInt i _)) = do
 328   (rlo,rhi) <- getNewRegPairNat II32
 329   let
 330         r = fromIntegral (fromIntegral i :: Word32)
 331         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 332         code = toOL [
 333                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 334                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 335                 ]
 336   -- in
 337   return (ChildCode64 code rlo)
 338
 339 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 340    Amode addr addr_code <- getAmode addrTree
 341    (rlo,rhi) <- getNewRegPairNat II32
 342    let
 343         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 344         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 345    -- in
 346    return (
 347             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 348                         rlo
 349      )
 350
 351 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 352    = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
 353
 354 -- we handle addition, but rather badly
 355 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 356    ChildCode64 code1 r1lo <- iselExpr64 e1
 357    (rlo,rhi) <- getNewRegPairNat II32
 358    let
 359         r = fromIntegral (fromIntegral i :: Word32)
 360         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 361         r1hi = getHiVRegFromLo r1lo
 362         code =  code1 `appOL`
 363                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 364                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 365                        MOV II32 (OpReg r1hi) (OpReg rhi),
 366                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 367    -- in
 368    return (ChildCode64 code rlo)
 369
 370 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 371    ChildCode64 code1 r1lo <- iselExpr64 e1
 372    ChildCode64 code2 r2lo <- iselExpr64 e2
 373    (rlo,rhi) <- getNewRegPairNat II32
 374    let
 375         r1hi = getHiVRegFromLo r1lo
 376         r2hi = getHiVRegFromLo r2lo
 377         code =  code1 `appOL`
 378                 code2 `appOL`
 379                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 380                        ADD II32 (OpReg r2lo) (OpReg rlo),
 381                        MOV II32 (OpReg r1hi) (OpReg rhi),
 382                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 383    -- in
 384    return (ChildCode64 code rlo)
 385
 386 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 387      fn <- getAnyReg expr
 388      r_dst_lo <-  getNewRegNat II32
 389      let r_dst_hi = getHiVRegFromLo r_dst_lo
 390          code = fn r_dst_lo
 391      return (
 392              ChildCode64 (code `snocOL`
 393                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 394                           r_dst_lo
 395             )
 396
 397 iselExpr64 expr
 398    = pprPanic "iselExpr64(i386)" (ppr expr)
 399
 400
 401
 402 --------------------------------------------------------------------------------
 403 getRegister :: CmmExpr -> NatM Register
 404
 405 #if !x86_64_TARGET_ARCH
 406     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 407     -- register, it can only be used for rip-relative addressing.
 408 getRegister (CmmReg (CmmGlobal PicBaseReg))
 409   = do
 410       reg <- getPicBaseNat archWordSize
 411       return (Fixed archWordSize reg nilOL)
 412 #endif
 413
 414 getRegister (CmmReg reg)
 415   = do use_sse2 <- sse2Enabled
 416        let
 417          sz = cmmTypeSize (cmmRegType reg)
 418          size | not use_sse2 && isFloatSize sz = FF80
 419               | otherwise                      = sz
 420        --
 421        return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
 422
 423
 424 getRegister (CmmRegOff r n)
 425   = getRegister $ mangleIndexTree r n
 426
 427
 428 #if WORD_SIZE_IN_BITS==32
 429     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 430     -- TO_W_(x), TO_W_(x >> 32)
 431
 432 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 433              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 434   ChildCode64 code rlo <- iselExpr64 x
 435   return $ Fixed II32 (getHiVRegFromLo rlo) code
 436
 437 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 438              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 439   ChildCode64 code rlo <- iselExpr64 x
 440   return $ Fixed II32 (getHiVRegFromLo rlo) code
 441
 442 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 443   ChildCode64 code rlo <- iselExpr64 x
 444   return $ Fixed II32 rlo code
 445
 446 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 447   ChildCode64 code rlo <- iselExpr64 x
 448   return $ Fixed II32 rlo code
 449
 450 #endif
 451
 452
 453 getRegister (CmmLit lit@(CmmFloat f w)) =
 454   if_sse2 float_const_sse2 float_const_x87
 455  where
 456   float_const_sse2
 457     | f == 0.0 = do
 458       let
 459           size = floatSize w
 460           code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 461         -- I don't know why there are xorpd, xorps, and pxor instructions.
 462         -- They all appear to do the same thing --SDM
 463       return (Any size code)
 464
 465    | otherwise = do
 466       Amode addr code <- memConstant (widthInBytes w) lit
 467       loadFloatAmode True w addr code
 468
 469   float_const_x87 = case w of
 470     W64
 471       | f == 0.0 ->
 472         let code dst = unitOL (GLDZ dst)
 473         in  return (Any FF80 code)
 474
 475       | f == 1.0 ->
 476         let code dst = unitOL (GLD1 dst)
 477         in  return (Any FF80 code)
 478
 479     _otherwise -> do
 480       Amode addr code <- memConstant (widthInBytes w) lit
 481       loadFloatAmode False w addr code
 482
 483 -- catch simple cases of zero- or sign-extended load
 484 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 485   code <- intLoadCode (MOVZxL II8) addr
 486   return (Any II32 code)
 487
 488 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 489   code <- intLoadCode (MOVSxL II8) addr
 490   return (Any II32 code)
 491
 492 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 493   code <- intLoadCode (MOVZxL II16) addr
 494   return (Any II32 code)
 495
 496 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 497   code <- intLoadCode (MOVSxL II16) addr
 498   return (Any II32 code)
 499
 500
 501 #if x86_64_TARGET_ARCH
 502
 503 -- catch simple cases of zero- or sign-extended load
 504 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 505   code <- intLoadCode (MOVZxL II8) addr
 506   return (Any II64 code)
 507
 508 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 509   code <- intLoadCode (MOVSxL II8) addr
 510   return (Any II64 code)
 511
 512 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 513   code <- intLoadCode (MOVZxL II16) addr
 514   return (Any II64 code)
 515
 516 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 517   code <- intLoadCode (MOVSxL II16) addr
 518   return (Any II64 code)
 519
 520 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 521   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 522   return (Any II64 code)
 523
 524 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 525   code <- intLoadCode (MOVSxL II32) addr
 526   return (Any II64 code)
 527
 528 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 529                                      CmmLit displacement])
 530     = return $ Any II64 (\dst -> unitOL $
 531         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 532
 533 #endif /* x86_64_TARGET_ARCH */
 534
 535
 536
 537
 538
 539 getRegister (CmmMachOp mop [x]) = do -- unary MachOps
 540     sse2 <- sse2Enabled
 541     case mop of
 542       MO_F_Neg w
 543          | sse2      -> sse2NegCode w x
 544          | otherwise -> trivialUFCode FF80 (GNEG FF80) x
 545
 546       MO_S_Neg w -> triv_ucode NEGI (intSize w)
 547       MO_Not w   -> triv_ucode NOT  (intSize w)
 548
 549       -- Nop conversions
 550       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
 551       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
 552       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
 553       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
 554       MO_UU_Conv W32 W16 -> toI16Reg W32 x
 555       MO_SS_Conv W32 W16 -> toI16Reg W32 x
 556
 557 #if x86_64_TARGET_ARCH
 558       MO_UU_Conv W64 W32 -> conversionNop II64 x
 559       MO_SS_Conv W64 W32 -> conversionNop II64 x
 560       MO_UU_Conv W64 W16 -> toI16Reg W64 x
 561       MO_SS_Conv W64 W16 -> toI16Reg W64 x
 562       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
 563       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
 564 #endif
 565
 566       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 567       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 568
 569       -- widenings
 570       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
 571       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
 572       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
 573
 574       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
 575       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
 576       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
 577
 578 #if x86_64_TARGET_ARCH
 579       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
 580       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
 581       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
 582       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
 583       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
 584       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
 585         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 586         -- However, we don't want the register allocator to throw it
 587         -- away as an unnecessary reg-to-reg move, so we keep it in
 588         -- the form of a movzl and print it as a movl later.
 589 #endif
 590
 591       MO_FF_Conv W32 W64
 592         | sse2      -> coerceFP2FP W64 x
 593         | otherwise -> conversionNop FF80 x
 594
 595       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
 596
 597       MO_FS_Conv from to -> coerceFP2Int from to x
 598       MO_SF_Conv from to -> coerceInt2FP from to x
 599
 600       _other -> pprPanic "getRegister" (pprMachOp mop)
 601    where
 602         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
 603         triv_ucode instr size = trivialUCode size (instr size) x
 604
 605         -- signed or unsigned extension.
 606         integerExtend :: Width -> Width
 607                       -> (Size -> Operand -> Operand -> Instr)
 608                       -> CmmExpr -> NatM Register
 609         integerExtend from to instr expr = do
 610             (reg,e_code) <- if from == W8 then getByteReg expr
 611                                           else getSomeReg expr
 612             let
 613                 code dst =
 614                   e_code `snocOL`
 615                   instr (intSize from) (OpReg reg) (OpReg dst)
 616             return (Any (intSize to) code)
 617
 618         toI8Reg :: Width -> CmmExpr -> NatM Register
 619         toI8Reg new_rep expr
 620             = do codefn <- getAnyReg expr
 621                  return (Any (intSize new_rep) codefn)
 622                 -- HACK: use getAnyReg to get a byte-addressable register.
 623                 -- If the source was a Fixed register, this will add the
 624                 -- mov instruction to put it into the desired destination.
 625                 -- We're assuming that the destination won't be a fixed
 626                 -- non-byte-addressable register; it won't be, because all
 627                 -- fixed registers are word-sized.
 628
 629         toI16Reg = toI8Reg -- for now
 630
 631         conversionNop :: Size -> CmmExpr -> NatM Register
 632         conversionNop new_size expr
 633             = do e_code <- getRegister expr
 634                  return (swizzleRegisterRep e_code new_size)
 635
 636
 637 getRegister (CmmMachOp mop [x, y]) = do -- dyadic MachOps
 638   sse2 <- sse2Enabled
 639   case mop of
 640       MO_F_Eq _ -> condFltReg EQQ x y
 641       MO_F_Ne _ -> condFltReg NE  x y
 642       MO_F_Gt _ -> condFltReg GTT x y
 643       MO_F_Ge _ -> condFltReg GE  x y
 644       MO_F_Lt _ -> condFltReg LTT x y
 645       MO_F_Le _ -> condFltReg LE  x y
 646
 647       MO_Eq _   -> condIntReg EQQ x y
 648       MO_Ne _   -> condIntReg NE  x y
 649
 650       MO_S_Gt _ -> condIntReg GTT x y
 651       MO_S_Ge _ -> condIntReg GE  x y
 652       MO_S_Lt _ -> condIntReg LTT x y
 653       MO_S_Le _ -> condIntReg LE  x y
 654
 655       MO_U_Gt _ -> condIntReg GU  x y
 656       MO_U_Ge _ -> condIntReg GEU x y
 657       MO_U_Lt _ -> condIntReg LU  x y
 658       MO_U_Le _ -> condIntReg LEU x y
 659
 660       MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
 661                   | otherwise -> trivialFCode_x87    GADD x y
 662       MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
 663                   | otherwise -> trivialFCode_x87    GSUB x y
 664       MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
 665                   | otherwise -> trivialFCode_x87    GDIV x y
 666       MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
 667                   | otherwise -> trivialFCode_x87    GMUL x y
 668
 669       MO_Add rep -> add_code rep x y
 670       MO_Sub rep -> sub_code rep x y
 671
 672       MO_S_Quot rep -> div_code rep True  True  x y
 673       MO_S_Rem  rep -> div_code rep True  False x y
 674       MO_U_Quot rep -> div_code rep False True  x y
 675       MO_U_Rem  rep -> div_code rep False False x y
 676
 677       MO_S_MulMayOflo rep -> imulMayOflo rep x y
 678
 679       MO_Mul rep -> triv_op rep IMUL
 680       MO_And rep -> triv_op rep AND
 681       MO_Or  rep -> triv_op rep OR
 682       MO_Xor rep -> triv_op rep XOR
 683
 684         {- Shift ops on x86s have constraints on their source, it
 685            either has to be Imm, CL or 1
 686             => trivialCode is not restrictive enough (sigh.)
 687         -}
 688       MO_Shl rep   -> shift_code rep SHL x y {-False-}
 689       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
 690       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
 691
 692       _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
 693   where
 694     --------------------
 695     triv_op width instr = trivialCode width op (Just op) x y
 696                         where op   = instr (intSize width)
 697
 698     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
 699     imulMayOflo rep a b = do
 700          (a_reg, a_code) <- getNonClobberedReg a
 701          b_code <- getAnyReg b
 702          let
 703              shift_amt  = case rep of
 704                            W32 -> 31
 705                            W64 -> 63
 706                            _ -> panic "shift_amt"
 707
 708              size = intSize rep
 709              code = a_code `appOL` b_code eax `appOL`
 710                         toOL [
 711                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
 712                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
 713                                 -- sign extend lower part
 714                            SUB size (OpReg edx) (OpReg eax)
 715                                 -- compare against upper
 716                            -- eax==0 if high part == sign extended low part
 717                         ]
 718          -- in
 719          return (Fixed size eax code)
 720
 721     --------------------
 722     shift_code :: Width
 723                -> (Size -> Operand -> Operand -> Instr)
 724                -> CmmExpr
 725                -> CmmExpr
 726                -> NatM Register
 727
 728     {- Case1: shift length as immediate -}
 729     shift_code width instr x (CmmLit lit) = do
 730           x_code <- getAnyReg x
 731           let
 732                size = intSize width
 733                code dst
 734                   = x_code dst `snocOL`
 735                     instr size (OpImm (litToImm lit)) (OpReg dst)
 736           -- in
 737           return (Any size code)
 738
 739     {- Case2: shift length is complex (non-immediate)
 740       * y must go in %ecx.
 741       * we cannot do y first *and* put its result in %ecx, because
 742         %ecx might be clobbered by x.
 743       * if we do y second, then x cannot be
 744         in a clobbered reg.  Also, we cannot clobber x's reg
 745         with the instruction itself.
 746       * so we can either:
 747         - do y first, put its result in a fresh tmp, then copy it to %ecx later
 748         - do y second and put its result into %ecx.  x gets placed in a fresh
 749           tmp.  This is likely to be better, becuase the reg alloc can
 750           eliminate this reg->reg move here (it won't eliminate the other one,
 751           because the move is into the fixed %ecx).
 752     -}
 753     shift_code width instr x y{-amount-} = do
 754         x_code <- getAnyReg x
 755         let size = intSize width
 756         tmp <- getNewRegNat size
 757         y_code <- getAnyReg y
 758         let
 759            code = x_code tmp `appOL`
 760                   y_code ecx `snocOL`
 761                   instr size (OpReg ecx) (OpReg tmp)
 762         -- in
 763         return (Fixed size tmp code)
 764
 765     --------------------
 766     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 767     add_code rep x (CmmLit (CmmInt y _))
 768         | is32BitInteger y = add_int rep x y
 769     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
 770       where size = intSize rep
 771
 772     --------------------
 773     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 774     sub_code rep x (CmmLit (CmmInt y _))
 775         | is32BitInteger (-y) = add_int rep x (-y)
 776     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
 777
 778     -- our three-operand add instruction:
 779     add_int width x y = do
 780         (x_reg, x_code) <- getSomeReg x
 781         let
 782             size = intSize width
 783             imm = ImmInt (fromInteger y)
 784             code dst
 785                = x_code `snocOL`
 786                  LEA size
 787                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
 788                         (OpReg dst)
 789         --
 790         return (Any size code)
 791
 792     ----------------------
 793     div_code width signed quotient x y = do
 794            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
 795            x_code <- getAnyReg x
 796            let
 797              size = intSize width
 798              widen | signed    = CLTD size
 799                    | otherwise = XOR size (OpReg edx) (OpReg edx)
 800
 801              instr | signed    = IDIV
 802                    | otherwise = DIV
 803
 804              code = y_code `appOL`
 805                     x_code eax `appOL`
 806                     toOL [widen, instr size y_op]
 807
 808              result | quotient  = eax
 809                     | otherwise = edx
 810
 811            -- in
 812            return (Fixed size result code)
 813
 814
 815 getRegister (CmmLoad mem pk)
 816   | isFloatType pk
 817   = do
 818     Amode addr mem_code <- getAmode mem
 819     use_sse2 <- sse2Enabled
 820     loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
 821
 822 #if i386_TARGET_ARCH
 823 getRegister (CmmLoad mem pk)
 824   | not (isWord64 pk)
 825   = do
 826     code <- intLoadCode instr mem
 827     return (Any size code)
 828   where
 829     width = typeWidth pk
 830     size = intSize width
 831     instr = case width of
 832                 W8     -> MOVZxL II8
 833                 _other -> MOV size
 834         -- We always zero-extend 8-bit loads, if we
 835         -- can't think of anything better.  This is because
 836         -- we can't guarantee access to an 8-bit variant of every register
 837         -- (esi and edi don't have 8-bit variants), so to make things
 838         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
 839 #endif
 840
 841 #if x86_64_TARGET_ARCH
 842 -- Simpler memory load code on x86_64
 843 getRegister (CmmLoad mem pk)
 844   = do
 845     code <- intLoadCode (MOV size) mem
 846     return (Any size code)
 847   where size = intSize $ typeWidth pk
 848 #endif
 849
 850 getRegister (CmmLit (CmmInt 0 width))
 851   = let
 852         size = intSize width
 853
 854         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
 855         size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size )
 856         code dst
 857            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
 858     in
 859         return (Any size code)
 860
 861 #if x86_64_TARGET_ARCH
 862   -- optimisation for loading small literals on x86_64: take advantage
 863   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
 864   -- instruction forms are shorter.
 865 getRegister (CmmLit lit)
 866   | isWord64 (cmmLitType lit), not (isBigLit lit)
 867   = let
 868         imm = litToImm lit
 869         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
 870     in
 871         return (Any II64 code)
 872   where
 873    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
 874    isBigLit _ = False
 875         -- note1: not the same as (not.is32BitLit), because that checks for
 876         -- signed literals that fit in 32 bits, but we want unsigned
 877         -- literals here.
 878         -- note2: all labels are small, because we're assuming the
 879         -- small memory model (see gcc docs, -mcmodel=small).
 880 #endif
 881
 882 getRegister (CmmLit lit)
 883   = let
 884         size = cmmTypeSize (cmmLitType lit)
 885         imm = litToImm lit
 886         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
 887     in
 888         return (Any size code)
 889
 890 getRegister other = pprPanic "getRegister(x86)" (ppr other)
 891
 892
 893 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
 894    -> NatM (Reg -> InstrBlock)
 895 intLoadCode instr mem = do
 896   Amode src mem_code <- getAmode mem
 897   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
 898
 899 -- Compute an expression into *any* register, adding the appropriate
 900 -- move instruction if necessary.
 901 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
 902 getAnyReg expr = do
 903   r <- getRegister expr
 904   anyReg r
 905
 906 anyReg :: Register -> NatM (Reg -> InstrBlock)
 907 anyReg (Any _ code)          = return code
 908 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
 909
 910 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
 911 -- Fixed registers might not be byte-addressable, so we make sure we've
 912 -- got a temporary, inserting an extra reg copy if necessary.
 913 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
 914 #if x86_64_TARGET_ARCH
 915 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
 916 #else
 917 getByteReg expr = do
 918   r <- getRegister expr
 919   case r of
 920     Any rep code -> do
 921         tmp <- getNewRegNat rep
 922         return (tmp, code tmp)
 923     Fixed rep reg code
 924         | isVirtualReg reg -> return (reg,code)
 925         | otherwise -> do
 926             tmp <- getNewRegNat rep
 927             return (tmp, code `snocOL` reg2reg rep reg tmp)
 928         -- ToDo: could optimise slightly by checking for byte-addressable
 929         -- real registers, but that will happen very rarely if at all.
 930 #endif
 931
 932 -- Another variant: this time we want the result in a register that cannot
 933 -- be modified by code to evaluate an arbitrary expression.
 934 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
 935 getNonClobberedReg expr = do
 936   r <- getRegister expr
 937   case r of
 938     Any rep code -> do
 939         tmp <- getNewRegNat rep
 940         return (tmp, code tmp)
 941     Fixed rep reg code
 942         -- only free regs can be clobbered
 943         | RegReal (RealRegSingle rr) <- reg
 944         , isFastTrue (freeReg rr)
 945         -> do
 946                 tmp <- getNewRegNat rep
 947                 return (tmp, code `snocOL` reg2reg rep reg tmp)
 948         | otherwise ->
 949                 return (reg, code)
 950
 951 reg2reg :: Size -> Reg -> Reg -> Instr
 952 reg2reg size src dst
 953   | size == FF80 = GMOV src dst
 954   | otherwise    = MOV size (OpReg src) (OpReg dst)
 955
 956
 957 --------------------------------------------------------------------------------
 958 getAmode :: CmmExpr -> NatM Amode
 959 getAmode (CmmRegOff r n) = getAmode $ mangleIndexTree r n
 960
 961 #if x86_64_TARGET_ARCH
 962
 963 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 964                                      CmmLit displacement])
 965     = return $ Amode (ripRel (litToImm displacement)) nilOL
 966
 967 #endif
 968
 969
 970 -- This is all just ridiculous, since it carefully undoes
 971 -- what mangleIndexTree has just done.
 972 getAmode (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
 973   | is32BitLit lit
 974   -- ASSERT(rep == II32)???
 975   = do (x_reg, x_code) <- getSomeReg x
 976        let off = ImmInt (-(fromInteger i))
 977        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
 978
 979 getAmode (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
 980   | is32BitLit lit
 981   -- ASSERT(rep == II32)???
 982   = do (x_reg, x_code) <- getSomeReg x
 983        let off = litToImm lit
 984        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
 985
 986 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
 987 -- recognised by the next rule.
 988 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
 989                                   b@(CmmLit _)])
 990   = getAmode (CmmMachOp (MO_Add rep) [b,a])
 991
 992 getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
 993                                         [y, CmmLit (CmmInt shift _)]])
 994   | shift == 0 || shift == 1 || shift == 2 || shift == 3
 995   = x86_complex_amode x y shift 0
 996
 997 getAmode (CmmMachOp (MO_Add _)
 998                 [x, CmmMachOp (MO_Add _)
 999                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1000                          CmmLit (CmmInt offset _)]])
1001   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1002   && is32BitInteger offset
1003   = x86_complex_amode x y shift offset
1004
1005 getAmode (CmmMachOp (MO_Add _) [x,y])
1006   = x86_complex_amode x y 0 0
1007
1008 getAmode (CmmLit lit) | is32BitLit lit
1009   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1010
1011 getAmode expr = do
1012   (reg,code) <- getSomeReg expr
1013   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1014
1015
1016 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1017 x86_complex_amode base index shift offset
1018   = do (x_reg, x_code) <- getNonClobberedReg base
1019         -- x must be in a temp, because it has to stay live over y_code
1020         -- we could compre x_reg and y_reg and do something better here...
1021        (y_reg, y_code) <- getSomeReg index
1022        let
1023            code = x_code `appOL` y_code
1024            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1025                                 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1026        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1027                code)
1028
1029
1030
1031
1032 -- -----------------------------------------------------------------------------
1033 -- getOperand: sometimes any operand will do.
1034
1035 -- getNonClobberedOperand: the value of the operand will remain valid across
1036 -- the computation of an arbitrary expression, unless the expression
1037 -- is computed directly into a register which the operand refers to
1038 -- (see trivialCode where this function is used for an example).
1039
1040 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1041 getNonClobberedOperand (CmmLit lit) = do
1042   use_sse2 <- sse2Enabled
1043   if use_sse2 && isSuitableFloatingPointLit lit
1044     then do
1045       let CmmFloat _ w = lit
1046       Amode addr code <- memConstant (widthInBytes w) lit
1047       return (OpAddr addr, code)
1048      else do
1049
1050   if is32BitLit lit && not (isFloatType (cmmLitType lit))
1051     then return (OpImm (litToImm lit), nilOL)
1052     else getNonClobberedOperand_generic (CmmLit lit)
1053
1054 getNonClobberedOperand (CmmLoad mem pk) = do
1055   use_sse2 <- sse2Enabled
1056   if (not (isFloatType pk) || use_sse2)
1057       && IF_ARCH_i386(not (isWord64 pk), True)
1058     then do
1059       Amode src mem_code <- getAmode mem
1060       (src',save_code) <-
1061         if (amodeCouldBeClobbered src)
1062                 then do
1063                    tmp <- getNewRegNat archWordSize
1064                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1065                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1066                 else
1067                    return (src, nilOL)
1068       return (OpAddr src', save_code `appOL` mem_code)
1069     else do
1070       getNonClobberedOperand_generic (CmmLoad mem pk)
1071
1072 getNonClobberedOperand e = getNonClobberedOperand_generic e
1073
1074 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1075 getNonClobberedOperand_generic e = do
1076     (reg, code) <- getNonClobberedReg e
1077     return (OpReg reg, code)
1078
1079 amodeCouldBeClobbered :: AddrMode -> Bool
1080 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1081
1082 regClobbered :: Reg -> Bool
1083 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1084 regClobbered _ = False
1085
1086 -- getOperand: the operand is not required to remain valid across the
1087 -- computation of an arbitrary expression.
1088 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1089
1090 getOperand (CmmLit lit) = do
1091   use_sse2 <- sse2Enabled
1092   if (use_sse2 && isSuitableFloatingPointLit lit)
1093     then do
1094       let CmmFloat _ w = lit
1095       Amode addr code <- memConstant (widthInBytes w) lit
1096       return (OpAddr addr, code)
1097     else do
1098
1099   if is32BitLit lit && not (isFloatType (cmmLitType lit))
1100     then return (OpImm (litToImm lit), nilOL)
1101     else getOperand_generic (CmmLit lit)
1102
1103 getOperand (CmmLoad mem pk) = do
1104   use_sse2 <- sse2Enabled
1105   if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1106      then do
1107        Amode src mem_code <- getAmode mem
1108        return (OpAddr src, mem_code)
1109      else
1110        getOperand_generic (CmmLoad mem pk)
1111
1112 getOperand e = getOperand_generic e
1113
1114 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1115 getOperand_generic e = do
1116     (reg, code) <- getSomeReg e
1117     return (OpReg reg, code)
1118
1119 isOperand :: CmmExpr -> Bool
1120 isOperand (CmmLoad _ _) = True
1121 isOperand (CmmLit lit)  = is32BitLit lit
1122                           || isSuitableFloatingPointLit lit
1123 isOperand _             = False
1124
1125 memConstant :: Int -> CmmLit -> NatM Amode
1126 memConstant align lit = do
1127 #ifdef x86_64_TARGET_ARCH
1128   lbl <- getNewLabelNat
1129   let addr = ripRel (ImmCLbl lbl)
1130       addr_code = nilOL
1131 #else
1132   lbl <- getNewLabelNat
1133   dflags <- getDynFlagsNat
1134   dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1135   Amode addr addr_code <- getAmode dynRef
1136 #endif
1137   let code =
1138         LDATA ReadOnlyData
1139                 [CmmAlign align,
1140                  CmmDataLabel lbl,
1141                  CmmStaticLit lit]
1142         `consOL` addr_code
1143   return (Amode addr code)
1144
1145
1146 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1147 loadFloatAmode use_sse2 w addr addr_code = do
1148   let size = floatSize w
1149       code dst = addr_code `snocOL`
1150                  if use_sse2
1151                     then MOV size (OpAddr addr) (OpReg dst)
1152                     else GLD size addr dst
1153   -- in
1154   return (Any (if use_sse2 then size else FF80) code)
1155
1156
1157 -- if we want a floating-point literal as an operand, we can
1158 -- use it directly from memory.  However, if the literal is
1159 -- zero, we're better off generating it into a register using
1160 -- xor.
1161 isSuitableFloatingPointLit :: CmmLit -> Bool
1162 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1163 isSuitableFloatingPointLit _ = False
1164
1165 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1166 getRegOrMem e@(CmmLoad mem pk) = do
1167   use_sse2 <- sse2Enabled
1168   if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1169      then do
1170        Amode src mem_code <- getAmode mem
1171        return (OpAddr src, mem_code)
1172      else do
1173        (reg, code) <- getNonClobberedReg e
1174        return (OpReg reg, code)
1175 getRegOrMem e = do
1176     (reg, code) <- getNonClobberedReg e
1177     return (OpReg reg, code)
1178
1179 is32BitLit :: CmmLit -> Bool
1180 #if x86_64_TARGET_ARCH
1181 is32BitLit (CmmInt i W64) = is32BitInteger i
1182    -- assume that labels are in the range 0-2^31-1: this assumes the
1183    -- small memory model (see gcc docs, -mcmodel=small).
1184 #endif
1185 is32BitLit _ = True
1186
1187
1188
1189
1190 -- Set up a condition code for a conditional branch.
1191
1192 getCondCode :: CmmExpr -> NatM CondCode
1193
1194 -- yes, they really do seem to want exactly the same!
1195
1196 getCondCode (CmmMachOp mop [x, y])
1197   =
1198     case mop of
1199       MO_F_Eq W32 -> condFltCode EQQ x y
1200       MO_F_Ne W32 -> condFltCode NE  x y
1201       MO_F_Gt W32 -> condFltCode GTT x y
1202       MO_F_Ge W32 -> condFltCode GE  x y
1203       MO_F_Lt W32 -> condFltCode LTT x y
1204       MO_F_Le W32 -> condFltCode LE  x y
1205
1206       MO_F_Eq W64 -> condFltCode EQQ x y
1207       MO_F_Ne W64 -> condFltCode NE  x y
1208       MO_F_Gt W64 -> condFltCode GTT x y
1209       MO_F_Ge W64 -> condFltCode GE  x y
1210       MO_F_Lt W64 -> condFltCode LTT x y
1211       MO_F_Le W64 -> condFltCode LE  x y
1212
1213       MO_Eq _ -> condIntCode EQQ x y
1214       MO_Ne _ -> condIntCode NE  x y
1215
1216       MO_S_Gt _ -> condIntCode GTT x y
1217       MO_S_Ge _ -> condIntCode GE  x y
1218       MO_S_Lt _ -> condIntCode LTT x y
1219       MO_S_Le _ -> condIntCode LE  x y
1220
1221       MO_U_Gt _ -> condIntCode GU  x y
1222       MO_U_Ge _ -> condIntCode GEU x y
1223       MO_U_Lt _ -> condIntCode LU  x y
1224       MO_U_Le _ -> condIntCode LEU x y
1225
1226       _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1227
1228 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1229
1230
1231
1232
1233 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1234 -- passed back up the tree.
1235
1236 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1237
1238 -- memory vs immediate
1239 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1240     Amode x_addr x_code <- getAmode x
1241     let
1242         imm  = litToImm lit
1243         code = x_code `snocOL`
1244                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1245     --
1246     return (CondCode False cond code)
1247
1248 -- anything vs zero, using a mask
1249 -- TODO: Add some sanity checking!!!!
1250 condIntCode cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1251     | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit lit
1252     = do
1253       (x_reg, x_code) <- getSomeReg x
1254       let
1255          code = x_code `snocOL`
1256                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1257       --
1258       return (CondCode False cond code)
1259
1260 -- anything vs zero
1261 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1262     (x_reg, x_code) <- getSomeReg x
1263     let
1264         code = x_code `snocOL`
1265                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1266     --
1267     return (CondCode False cond code)
1268
1269 -- anything vs operand
1270 condIntCode cond x y | isOperand y = do
1271     (x_reg, x_code) <- getNonClobberedReg x
1272     (y_op,  y_code) <- getOperand y
1273     let
1274         code = x_code `appOL` y_code `snocOL`
1275                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1276     -- in
1277     return (CondCode False cond code)
1278
1279 -- anything vs anything
1280 condIntCode cond x y = do
1281   (y_reg, y_code) <- getNonClobberedReg y
1282   (x_op, x_code) <- getRegOrMem x
1283   let
1284         code = y_code `appOL`
1285                x_code `snocOL`
1286                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1287   -- in
1288   return (CondCode False cond code)
1289
1290
1291
1292 --------------------------------------------------------------------------------
1293 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1294
1295 condFltCode cond x y
1296   = if_sse2 condFltCode_sse2 condFltCode_x87
1297   where
1298
1299   condFltCode_x87
1300     = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1301     (x_reg, x_code) <- getNonClobberedReg x
1302     (y_reg, y_code) <- getSomeReg y
1303     let
1304         code = x_code `appOL` y_code `snocOL`
1305                 GCMP cond x_reg y_reg
1306     -- The GCMP insn does the test and sets the zero flag if comparable
1307     -- and true.  Hence we always supply EQQ as the condition to test.
1308     return (CondCode True EQQ code)
1309
1310   -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1311   -- an operand, but the right must be a reg.  We can probably do better
1312   -- than this general case...
1313   condFltCode_sse2 = do
1314     (x_reg, x_code) <- getNonClobberedReg x
1315     (y_op, y_code) <- getOperand y
1316     let
1317         code = x_code `appOL`
1318                y_code `snocOL`
1319                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1320         -- NB(1): we need to use the unsigned comparison operators on the
1321         -- result of this comparison.
1322     -- in
1323     return (CondCode True (condToUnsigned cond) code)
1324
1325 -- -----------------------------------------------------------------------------
1326 -- Generating assignments
1327
1328 -- Assignments are really at the heart of the whole code generation
1329 -- business.  Almost all top-level nodes of any real importance are
1330 -- assignments, which correspond to loads, stores, or register
1331 -- transfers.  If we're really lucky, some of the register transfers
1332 -- will go away, because we can use the destination register to
1333 -- complete the code generation for the right hand side.  This only
1334 -- fails when the right hand side is forced into a fixed register
1335 -- (e.g. the result of a call).
1336
1337 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1338 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1339
1340 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1341 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1342
1343
1344 -- integer assignment to memory
1345
1346 -- specific case of adding/subtracting an integer to a particular address.
1347 -- ToDo: catch other cases where we can use an operation directly on a memory
1348 -- address.
1349 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1350                                                  CmmLit (CmmInt i _)])
1351    | addr == addr2, pk /= II64 || is32BitInteger i,
1352      Just instr <- check op
1353    = do Amode amode code_addr <- getAmode addr
1354         let code = code_addr `snocOL`
1355                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1356         return code
1357    where
1358         check (MO_Add _) = Just ADD
1359         check (MO_Sub _) = Just SUB
1360         check _ = Nothing
1361         -- ToDo: more?
1362
1363 -- general case
1364 assignMem_IntCode pk addr src = do
1365     Amode addr code_addr <- getAmode addr
1366     (code_src, op_src)   <- get_op_RI src
1367     let
1368         code = code_src `appOL`
1369                code_addr `snocOL`
1370                   MOV pk op_src (OpAddr addr)
1371         -- NOTE: op_src is stable, so it will still be valid
1372         -- after code_addr.  This may involve the introduction
1373         -- of an extra MOV to a temporary register, but we hope
1374         -- the register allocator will get rid of it.
1375     --
1376     return code
1377   where
1378     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
1379     get_op_RI (CmmLit lit) | is32BitLit lit
1380       = return (nilOL, OpImm (litToImm lit))
1381     get_op_RI op
1382       = do (reg,code) <- getNonClobberedReg op
1383            return (code, OpReg reg)
1384
1385
1386 -- Assign; dst is a reg, rhs is mem
1387 assignReg_IntCode pk reg (CmmLoad src _) = do
1388   load_code <- intLoadCode (MOV pk) src
1389   return (load_code (getRegisterReg False{-no sse2-} reg))
1390
1391 -- dst is a reg, but src could be anything
1392 assignReg_IntCode _ reg src = do
1393   code <- getAnyReg src
1394   return (code (getRegisterReg False{-no sse2-} reg))
1395
1396
1397 -- Floating point assignment to memory
1398 assignMem_FltCode pk addr src = do
1399   (src_reg, src_code) <- getNonClobberedReg src
1400   Amode addr addr_code <- getAmode addr
1401   use_sse2 <- sse2Enabled
1402   let
1403         code = src_code `appOL`
1404                addr_code `snocOL`
1405                 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1406                             else GST pk src_reg addr
1407   return code
1408
1409 -- Floating point assignment to a register/temporary
1410 assignReg_FltCode _ reg src = do
1411   use_sse2 <- sse2Enabled
1412   src_code <- getAnyReg src
1413   return (src_code (getRegisterReg use_sse2 reg))
1414
1415
1416 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1417
1418 genJump (CmmLoad mem _) = do
1419   Amode target code <- getAmode mem
1420   return (code `snocOL` JMP (OpAddr target))
1421
1422 genJump (CmmLit lit) = do
1423   return (unitOL (JMP (OpImm (litToImm lit))))
1424
1425 genJump expr = do
1426   (reg,code) <- getSomeReg expr
1427   return (code `snocOL` JMP (OpReg reg))
1428
1429
1430 -- -----------------------------------------------------------------------------
1431 --  Unconditional branches
1432
1433 genBranch :: BlockId -> NatM InstrBlock
1434 genBranch = return . toOL . mkJumpInstr
1435
1436
1437
1438 -- -----------------------------------------------------------------------------
1439 --  Conditional jumps
1440
1441 {-
1442 Conditional jumps are always to local labels, so we can use branch
1443 instructions.  We peek at the arguments to decide what kind of
1444 comparison to do.
1445
1446 I386: First, we have to ensure that the condition
1447 codes are set according to the supplied comparison operation.
1448 -}
1449
1450 genCondJump
1451     :: BlockId      -- the branch target
1452     -> CmmExpr      -- the condition on which to branch
1453     -> NatM InstrBlock
1454
1455 genCondJump id bool = do
1456   CondCode is_float cond cond_code <- getCondCode bool
1457   use_sse2 <- sse2Enabled
1458   if not is_float || not use_sse2
1459     then
1460         return (cond_code `snocOL` JXX cond id)
1461     else do
1462         lbl <- getBlockIdNat
1463
1464         -- see comment with condFltReg
1465         let code = case cond of
1466                         NE  -> or_unordered
1467                         GU  -> plain_test
1468                         GEU -> plain_test
1469                         _   -> and_ordered
1470
1471             plain_test = unitOL (
1472                   JXX cond id
1473                 )
1474             or_unordered = toOL [
1475                   JXX cond id,
1476                   JXX PARITY id
1477                 ]
1478             and_ordered = toOL [
1479                   JXX PARITY lbl,
1480                   JXX cond id,
1481                   JXX ALWAYS lbl,
1482                   NEWBLOCK lbl
1483                 ]
1484         return (cond_code `appOL` code)
1485
1486
1487 -- -----------------------------------------------------------------------------
1488 --  Generating C calls
1489
1490 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
1491 -- @get_arg@, which moves the arguments to the correct registers/stack
1492 -- locations.  Apart from that, the code is easy.
1493 --
1494 -- (If applicable) Do not fill the delay slots here; you will confuse the
1495 -- register allocator.
1496
1497 genCCall
1498     :: CmmCallTarget            -- function to call
1499     -> HintedCmmFormals         -- where to put the result
1500     -> HintedCmmActuals         -- arguments (of mixed type)
1501     -> NatM InstrBlock
1502
1503 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1504
1505 #if i386_TARGET_ARCH
1506
1507 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1508         -- write barrier compiles to no code on x86/x86-64;
1509         -- we keep it this long in order to prevent earlier optimisations.
1510
1511 -- void return type prim op
1512 genCCall (CmmPrim op) [] args =
1513     outOfLineCmmOp op Nothing args
1514
1515 -- we only cope with a single result for foreign calls
1516 genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do
1517   l1 <- getNewLabelNat
1518   l2 <- getNewLabelNat
1519   sse2 <- sse2Enabled
1520   if sse2
1521     then
1522       outOfLineCmmOp op (Just r_hinted) args
1523     else case op of
1524         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1525         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1526
1527         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1528         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1529
1530         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1531         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1532
1533         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1534         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1535
1536         _other_op   -> outOfLineCmmOp op (Just r_hinted) args
1537
1538  where
1539   actuallyInlineFloatOp instr size [CmmHinted x _]
1540         = do res <- trivialUFCode size (instr size) x
1541              any <- anyReg res
1542              return (any (getRegisterReg False (CmmLocal r)))
1543
1544   actuallyInlineFloatOp _ _ args
1545         = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
1546                 ++ show (length args) ++ ")"
1547
1548 genCCall target dest_regs args = do
1549     let
1550         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1551 #if !darwin_TARGET_OS
1552         tot_arg_size        = sum sizes
1553 #else
1554         raw_arg_size        = sum sizes
1555         tot_arg_size        = roundTo 16 raw_arg_size
1556         arg_pad_size        = tot_arg_size - raw_arg_size
1557     delta0 <- getDeltaNat
1558     setDeltaNat (delta0 - arg_pad_size)
1559 #endif
1560
1561     use_sse2 <- sse2Enabled
1562     push_codes <- mapM (push_arg use_sse2) (reverse args)
1563     delta <- getDeltaNat
1564
1565     -- in
1566     -- deal with static vs dynamic call targets
1567     (callinsns,cconv) <-
1568       case target of
1569         CmmCallee (CmmLit (CmmLabel lbl)) conv
1570            -> -- ToDo: stdcall arg sizes
1571               return (unitOL (CALL (Left fn_imm) []), conv)
1572            where fn_imm = ImmCLbl lbl
1573         CmmCallee expr conv
1574            -> do { (dyn_r, dyn_c) <- getSomeReg expr
1575                  ; ASSERT( isWord32 (cmmExprType expr) )
1576                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1577         CmmPrim _
1578             -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1579                         ++ "probably because too many return values."
1580
1581     let push_code
1582 #if darwin_TARGET_OS
1583             | arg_pad_size /= 0
1584             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1585                     DELTA (delta0 - arg_pad_size)]
1586               `appOL` concatOL push_codes
1587             | otherwise
1588 #endif
1589             = concatOL push_codes
1590
1591           -- Deallocate parameters after call for ccall;
1592           -- but not for stdcall (callee does it)
1593           --
1594           -- We have to pop any stack padding we added
1595           -- on Darwin even if we are doing stdcall, though (#5052)
1596         pop_size | cconv /= StdCallConv = tot_arg_size
1597                  | otherwise
1598 #if darwin_TARGET_OS
1599                  = arg_pad_size
1600 #else
1601                  = 0
1602 #endif
1603
1604         call = callinsns `appOL`
1605                toOL (
1606                   (if pop_size==0 then [] else
1607                    [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1608                   ++
1609                   [DELTA (delta + tot_arg_size)]
1610                )
1611     -- in
1612     setDeltaNat (delta + tot_arg_size)
1613
1614     let
1615         -- assign the results, if necessary
1616         assign_code []     = nilOL
1617         assign_code [CmmHinted dest _hint]
1618           | isFloatType ty =
1619              if use_sse2
1620                 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1621                                                    EAIndexNone
1622                                                    (ImmInt 0)
1623                          sz = floatSize w
1624                      in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1625                                GST sz fake0 tmp_amode,
1626                                MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1627                                ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
1628                 else unitOL (GMOV fake0 r_dest)
1629           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1630                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1631           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1632           where
1633                 ty = localRegType dest
1634                 w  = typeWidth ty
1635                 b  = widthInBytes w
1636                 r_dest_hi = getHiVRegFromLo r_dest
1637                 r_dest    = getRegisterReg use_sse2 (CmmLocal dest)
1638         assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1639
1640     return (push_code `appOL`
1641             call `appOL`
1642             assign_code dest_regs)
1643
1644   where
1645     arg_size :: CmmType -> Int  -- Width in bytes
1646     arg_size ty = widthInBytes (typeWidth ty)
1647
1648 #if darwin_TARGET_OS
1649     roundTo a x | x `mod` a == 0 = x
1650                 | otherwise = x + a - (x `mod` a)
1651 #endif
1652
1653     push_arg :: Bool -> HintedCmmActual {-current argument-}
1654                     -> NatM InstrBlock  -- code
1655
1656     push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
1657       | isWord64 arg_ty = do
1658         ChildCode64 code r_lo <- iselExpr64 arg
1659         delta <- getDeltaNat
1660         setDeltaNat (delta - 8)
1661         let
1662             r_hi = getHiVRegFromLo r_lo
1663         -- in
1664         return (       code `appOL`
1665                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1666                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1667                              DELTA (delta-8)]
1668             )
1669
1670       | isFloatType arg_ty = do
1671         (reg, code) <- getSomeReg arg
1672         delta <- getDeltaNat
1673         setDeltaNat (delta-size)
1674         return (code `appOL`
1675                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1676                               DELTA (delta-size),
1677                               let addr = AddrBaseIndex (EABaseReg esp)
1678                                                         EAIndexNone
1679                                                         (ImmInt 0)
1680                                   size = floatSize (typeWidth arg_ty)
1681                               in
1682                               if use_sse2
1683                                  then MOV size (OpReg reg) (OpAddr addr)
1684                                  else GST size reg addr
1685                              ]
1686                        )
1687
1688       | otherwise = do
1689         (operand, code) <- getOperand arg
1690         delta <- getDeltaNat
1691         setDeltaNat (delta-size)
1692         return (code `snocOL`
1693                 PUSH II32 operand `snocOL`
1694                 DELTA (delta-size))
1695
1696       where
1697          arg_ty = cmmExprType arg
1698          size = arg_size arg_ty -- Byte size
1699
1700 #elif x86_64_TARGET_ARCH
1701
1702 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1703         -- write barrier compiles to no code on x86/x86-64;
1704         -- we keep it this long in order to prevent earlier optimisations.
1705
1706 -- void return type prim op
1707 genCCall (CmmPrim op) [] args =
1708   outOfLineCmmOp op Nothing args
1709
1710 -- we only cope with a single result for foreign calls
1711 genCCall (CmmPrim op) [res] args =
1712   outOfLineCmmOp op (Just res) args
1713
1714 genCCall target dest_regs args = do
1715
1716         -- load up the register arguments
1717     (stack_args, aregs, fregs, load_args_code)
1718          <- load_args args allArgRegs allFPArgRegs nilOL
1719
1720     let
1721         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
1722         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1723         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1724                 -- for annotating the call instruction with
1725
1726         sse_regs = length fp_regs_used
1727
1728         tot_arg_size = arg_size * length stack_args
1729
1730         -- On entry to the called function, %rsp should be aligned
1731         -- on a 16-byte boundary +8 (i.e. the first stack arg after
1732         -- the return address is 16-byte aligned).  In STG land
1733         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1734         -- need to make sure we push a multiple of 16-bytes of args,
1735         -- plus the return address, to get the correct alignment.
1736         -- Urg, this is hard.  We need to feed the delta back into
1737         -- the arg pushing code.
1738     (real_size, adjust_rsp) <-
1739         if tot_arg_size `rem` 16 == 0
1740             then return (tot_arg_size, nilOL)
1741             else do -- we need to adjust...
1742                 delta <- getDeltaNat
1743                 setDeltaNat (delta-8)
1744                 return (tot_arg_size+8, toOL [
1745                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1746                                 DELTA (delta-8)
1747                         ])
1748
1749         -- push the stack args, right to left
1750     push_code <- push_args (reverse stack_args) nilOL
1751     delta <- getDeltaNat
1752
1753     -- deal with static vs dynamic call targets
1754     (callinsns,cconv) <-
1755       case target of
1756         CmmCallee (CmmLit (CmmLabel lbl)) conv
1757            -> -- ToDo: stdcall arg sizes
1758               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1759            where fn_imm = ImmCLbl lbl
1760         CmmCallee expr conv
1761            -> do (dyn_r, dyn_c) <- getSomeReg expr
1762                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1763         CmmPrim _
1764             -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1765                         ++ "probably because too many return values."
1766
1767     let
1768         -- The x86_64 ABI requires us to set %al to the number of SSE2
1769         -- registers that contain arguments, if the called routine
1770         -- is a varargs function.  We don't know whether it's a
1771         -- varargs function or not, so we have to assume it is.
1772         --
1773         -- It's not safe to omit this assignment, even if the number
1774         -- of SSE2 regs in use is zero.  If %al is larger than 8
1775         -- on entry to a varargs function, seg faults ensue.
1776         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1777
1778     let call = callinsns `appOL`
1779                toOL (
1780                         -- Deallocate parameters after call for ccall;
1781                         -- but not for stdcall (callee does it)
1782                   (if cconv == StdCallConv || real_size==0 then [] else
1783                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1784                   ++
1785                   [DELTA (delta + real_size)]
1786                )
1787     -- in
1788     setDeltaNat (delta + real_size)
1789
1790     let
1791         -- assign the results, if necessary
1792         assign_code []     = nilOL
1793         assign_code [CmmHinted dest _hint] =
1794           case typeWidth rep of
1795                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1796                 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1797                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1798           where
1799                 rep = localRegType dest
1800                 r_dest = getRegisterReg True (CmmLocal dest)
1801         assign_code many = panic "genCCall.assign_code many"
1802
1803     return (load_args_code      `appOL`
1804             adjust_rsp          `appOL`
1805             push_code           `appOL`
1806             assign_eax sse_regs `appOL`
1807             call                `appOL`
1808             assign_code dest_regs)
1809
1810   where
1811     arg_size = 8 -- always, at the mo
1812
1813     load_args :: [CmmHinted CmmExpr]
1814               -> [Reg]                  -- int regs avail for args
1815               -> [Reg]                  -- FP regs avail for args
1816               -> InstrBlock
1817               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1818     load_args args [] [] code     =  return (args, [], [], code)
1819         -- no more regs to use
1820     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
1821         -- no more args to push
1822     load_args ((CmmHinted arg hint) : rest) aregs fregs code
1823         | isFloatType arg_rep =
1824         case fregs of
1825           [] -> push_this_arg
1826           (r:rs) -> do
1827              arg_code <- getAnyReg arg
1828              load_args rest aregs rs (code `appOL` arg_code r)
1829         | otherwise =
1830         case aregs of
1831           [] -> push_this_arg
1832           (r:rs) -> do
1833              arg_code <- getAnyReg arg
1834              load_args rest rs fregs (code `appOL` arg_code r)
1835         where
1836           arg_rep = cmmExprType arg
1837
1838           push_this_arg = do
1839             (args',ars,frs,code') <- load_args rest aregs fregs code
1840             return ((CmmHinted arg hint):args', ars, frs, code')
1841
1842     push_args [] code = return code
1843     push_args ((CmmHinted arg hint):rest) code
1844        | isFloatType arg_rep = do
1845          (arg_reg, arg_code) <- getSomeReg arg
1846          delta <- getDeltaNat
1847          setDeltaNat (delta-arg_size)
1848          let code' = code `appOL` arg_code `appOL` toOL [
1849                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1850                         DELTA (delta-arg_size),
1851                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
1852          push_args rest code'
1853
1854        | otherwise = do
1855        -- we only ever generate word-sized function arguments.  Promotion
1856        -- has already happened: our Int8# type is kept sign-extended
1857        -- in an Int#, for example.
1858          ASSERT(width == W64) return ()
1859          (arg_op, arg_code) <- getOperand arg
1860          delta <- getDeltaNat
1861          setDeltaNat (delta-arg_size)
1862          let code' = code `appOL` arg_code `appOL` toOL [
1863                                 PUSH II64 arg_op,
1864                                 DELTA (delta-arg_size)]
1865          push_args rest code'
1866         where
1867           arg_rep = cmmExprType arg
1868           width = typeWidth arg_rep
1869
1870 #else
1871 genCCall        = panic "X86.genCCAll: not defined"
1872
1873 #endif /* x86_64_TARGET_ARCH */
1874
1875
1876 outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> HintedCmmActuals -> NatM InstrBlock
1877 outOfLineCmmOp mop res args
1878   = do
1879       dflags <- getDynFlagsNat
1880       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1881       let target = CmmCallee targetExpr CCallConv
1882
1883       stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
1884   where
1885         -- Assume we can call these functions directly, and that they're not in a dynamic library.
1886         -- TODO: Why is this ok? Under linux this code will be in libm.so
1887         --       Is is because they're really implemented as a primitive instruction by the assembler??  -- BL 2009/12/31
1888         lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
1889
1890         args' = case mop of
1891                     MO_Memcpy    -> init args
1892                     MO_Memset    -> init args
1893                     MO_Memmove   -> init args
1894                     _            -> args
1895
1896         fn = case mop of
1897               MO_F32_Sqrt  -> fsLit "sqrtf"
1898               MO_F32_Sin   -> fsLit "sinf"
1899               MO_F32_Cos   -> fsLit "cosf"
1900               MO_F32_Tan   -> fsLit "tanf"
1901               MO_F32_Exp   -> fsLit "expf"
1902               MO_F32_Log   -> fsLit "logf"
1903
1904               MO_F32_Asin  -> fsLit "asinf"
1905               MO_F32_Acos  -> fsLit "acosf"
1906               MO_F32_Atan  -> fsLit "atanf"
1907
1908               MO_F32_Sinh  -> fsLit "sinhf"
1909               MO_F32_Cosh  -> fsLit "coshf"
1910               MO_F32_Tanh  -> fsLit "tanhf"
1911               MO_F32_Pwr   -> fsLit "powf"
1912
1913               MO_F64_Sqrt  -> fsLit "sqrt"
1914               MO_F64_Sin   -> fsLit "sin"
1915               MO_F64_Cos   -> fsLit "cos"
1916               MO_F64_Tan   -> fsLit "tan"
1917               MO_F64_Exp   -> fsLit "exp"
1918               MO_F64_Log   -> fsLit "log"
1919
1920               MO_F64_Asin  -> fsLit "asin"
1921               MO_F64_Acos  -> fsLit "acos"
1922               MO_F64_Atan  -> fsLit "atan"
1923
1924               MO_F64_Sinh  -> fsLit "sinh"
1925               MO_F64_Cosh  -> fsLit "cosh"
1926               MO_F64_Tanh  -> fsLit "tanh"
1927               MO_F64_Pwr   -> fsLit "pow"
1928
1929               MO_Memcpy    -> fsLit "memcpy"
1930               MO_Memset    -> fsLit "memset"
1931               MO_Memmove   -> fsLit "memmove"
1932
1933               other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
1934
1935
1936 -- -----------------------------------------------------------------------------
1937 -- Generating a table-branch
1938
1939 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1940
1941 genSwitch expr ids
1942   | opt_PIC
1943   = do
1944         (reg,e_code) <- getSomeReg expr
1945         lbl <- getNewLabelNat
1946         dflags <- getDynFlagsNat
1947         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1948         (tableReg,t_code) <- getSomeReg $ dynRef
1949         let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1950                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
1951
1952 #if x86_64_TARGET_ARCH
1953 #if darwin_TARGET_OS
1954     -- on Mac OS X/x86_64, put the jump table in the text section
1955     -- to work around a limitation of the linker.
1956     -- ld64 is unable to handle the relocations for
1957     --     .quad L1 - L0
1958     -- if L0 is not preceded by a non-anonymous label in its section.
1959
1960             code = e_code `appOL` t_code `appOL` toOL [
1961                             ADD (intSize wordWidth) op (OpReg tableReg),
1962                             JMP_TBL (OpReg tableReg) ids Text lbl
1963                     ]
1964 #else
1965     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1966     -- relocations, hence we only get 32-bit offsets in the jump
1967     -- table. As these offsets are always negative we need to properly
1968     -- sign extend them to 64-bit. This hack should be removed in
1969     -- conjunction with the hack in PprMach.hs/pprDataItem once
1970     -- binutils 2.17 is standard.
1971             code = e_code `appOL` t_code `appOL` toOL [
1972                             MOVSxL II32
1973                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1974                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
1975                                    (OpReg reg),
1976                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1977                             JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1978                    ]
1979 #endif
1980 #else
1981             code = e_code `appOL` t_code `appOL` toOL [
1982                             ADD (intSize wordWidth) op (OpReg tableReg),
1983                             JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1984                     ]
1985 #endif
1986         return code
1987   | otherwise
1988   = do
1989         (reg,e_code) <- getSomeReg expr
1990         lbl <- getNewLabelNat
1991         let
1992             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1993             code = e_code `appOL` toOL [
1994                     JMP_TBL op ids ReadOnlyData lbl
1995                  ]
1996         -- in
1997         return code
1998
1999 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
2000 generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl)
2001 generateJumpTableForInstr _ = Nothing
2002
2003 createJumpTable :: [Maybe BlockId] -> Section -> CLabel -> GenCmmTop CmmStatic h g
2004 createJumpTable ids section lbl
2005     = let jumpTable
2006             | opt_PIC =
2007                   let jumpTableEntryRel Nothing
2008                           = CmmStaticLit (CmmInt 0 wordWidth)
2009                       jumpTableEntryRel (Just blockid)
2010                           = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2011                           where blockLabel = mkAsmTempLabel (getUnique blockid)
2012                   in map jumpTableEntryRel ids
2013             | otherwise = map jumpTableEntry ids
2014       in CmmData section (CmmDataLabel lbl : jumpTable)
2015
2016 -- -----------------------------------------------------------------------------
2017 -- 'condIntReg' and 'condFltReg': condition codes into registers
2018
2019 -- Turn those condition codes into integers now (when they appear on
2020 -- the right hand side of an assignment).
2021 --
2022 -- (If applicable) Do not fill the delay slots here; you will confuse the
2023 -- register allocator.
2024
2025 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2026
2027 condIntReg cond x y = do
2028   CondCode _ cond cond_code <- condIntCode cond x y
2029   tmp <- getNewRegNat II8
2030   let
2031         code dst = cond_code `appOL` toOL [
2032                     SETCC cond (OpReg tmp),
2033                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2034                   ]
2035   -- in
2036   return (Any II32 code)
2037
2038
2039
2040 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2041 condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2042  where
2043   condFltReg_x87 = do
2044     CondCode _ cond cond_code <- condFltCode cond x y
2045     tmp <- getNewRegNat II8
2046     let
2047         code dst = cond_code `appOL` toOL [
2048                     SETCC cond (OpReg tmp),
2049                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2050                   ]
2051     -- in
2052     return (Any II32 code)
2053
2054   condFltReg_sse2 = do
2055     CondCode _ cond cond_code <- condFltCode cond x y
2056     tmp1 <- getNewRegNat archWordSize
2057     tmp2 <- getNewRegNat archWordSize
2058     let
2059         -- We have to worry about unordered operands (eg. comparisons
2060         -- against NaN).  If the operands are unordered, the comparison
2061         -- sets the parity flag, carry flag and zero flag.
2062         -- All comparisons are supposed to return false for unordered
2063         -- operands except for !=, which returns true.
2064         --
2065         -- Optimisation: we don't have to test the parity flag if we
2066         -- know the test has already excluded the unordered case: eg >
2067         -- and >= test for a zero carry flag, which can only occur for
2068         -- ordered operands.
2069         --
2070         -- ToDo: by reversing comparisons we could avoid testing the
2071         -- parity flag in more cases.
2072
2073         code dst =
2074            cond_code `appOL`
2075              (case cond of
2076                 NE  -> or_unordered dst
2077                 GU  -> plain_test   dst
2078                 GEU -> plain_test   dst
2079                 _   -> and_ordered  dst)
2080
2081         plain_test dst = toOL [
2082                     SETCC cond (OpReg tmp1),
2083                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
2084                  ]
2085         or_unordered dst = toOL [
2086                     SETCC cond (OpReg tmp1),
2087                     SETCC PARITY (OpReg tmp2),
2088                     OR II8 (OpReg tmp1) (OpReg tmp2),
2089                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2090                   ]
2091         and_ordered dst = toOL [
2092                     SETCC cond (OpReg tmp1),
2093                     SETCC NOTPARITY (OpReg tmp2),
2094                     AND II8 (OpReg tmp1) (OpReg tmp2),
2095                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2096                   ]
2097     -- in
2098     return (Any II32 code)
2099
2100
2101 -- -----------------------------------------------------------------------------
2102 -- 'trivial*Code': deal with trivial instructions
2103
2104 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2105 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2106 -- Only look for constants on the right hand side, because that's
2107 -- where the generic optimizer will have put them.
2108
2109 -- Similarly, for unary instructions, we don't have to worry about
2110 -- matching an StInt as the argument, because genericOpt will already
2111 -- have handled the constant-folding.
2112
2113
2114 {-
2115 The Rules of the Game are:
2116
2117 * You cannot assume anything about the destination register dst;
2118   it may be anything, including a fixed reg.
2119
2120 * You may compute an operand into a fixed reg, but you may not
2121   subsequently change the contents of that fixed reg.  If you
2122   want to do so, first copy the value either to a temporary
2123   or into dst.  You are free to modify dst even if it happens
2124   to be a fixed reg -- that's not your problem.
2125
2126 * You cannot assume that a fixed reg will stay live over an
2127   arbitrary computation.  The same applies to the dst reg.
2128
2129 * Temporary regs obtained from getNewRegNat are distinct from
2130   each other and from all other regs, and stay live over
2131   arbitrary computations.
2132
2133 --------------------
2134
2135 SDM's version of The Rules:
2136
2137 * If getRegister returns Any, that means it can generate correct
2138   code which places the result in any register, period.  Even if that
2139   register happens to be read during the computation.
2140
2141   Corollary #1: this means that if you are generating code for an
2142   operation with two arbitrary operands, you cannot assign the result
2143   of the first operand into the destination register before computing
2144   the second operand.  The second operand might require the old value
2145   of the destination register.
2146
2147   Corollary #2: A function might be able to generate more efficient
2148   code if it knows the destination register is a new temporary (and
2149   therefore not read by any of the sub-computations).
2150
2151 * If getRegister returns Any, then the code it generates may modify only:
2152         (a) fresh temporaries
2153         (b) the destination register
2154         (c) known registers (eg. %ecx is used by shifts)
2155   In particular, it may *not* modify global registers, unless the global
2156   register happens to be the destination register.
2157 -}
2158
2159 trivialCode :: Width -> (Operand -> Operand -> Instr)
2160             -> Maybe (Operand -> Operand -> Instr)
2161             -> CmmExpr -> CmmExpr -> NatM Register
2162 trivialCode width _ (Just revinstr) (CmmLit lit_a) b
2163   | is32BitLit lit_a = do
2164   b_code <- getAnyReg b
2165   let
2166        code dst
2167          = b_code dst `snocOL`
2168            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2169   -- in
2170   return (Any (intSize width) code)
2171
2172 trivialCode width instr _ a b
2173   = genTrivialCode (intSize width) instr a b
2174
2175 -- This is re-used for floating pt instructions too.
2176 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2177                -> CmmExpr -> CmmExpr -> NatM Register
2178 genTrivialCode rep instr a b = do
2179   (b_op, b_code) <- getNonClobberedOperand b
2180   a_code <- getAnyReg a
2181   tmp <- getNewRegNat rep
2182   let
2183      -- We want the value of b to stay alive across the computation of a.
2184      -- But, we want to calculate a straight into the destination register,
2185      -- because the instruction only has two operands (dst := dst `op` src).
2186      -- The troublesome case is when the result of b is in the same register
2187      -- as the destination reg.  In this case, we have to save b in a
2188      -- new temporary across the computation of a.
2189      code dst
2190         | dst `regClashesWithOp` b_op =
2191                 b_code `appOL`
2192                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2193                 a_code dst `snocOL`
2194                 instr (OpReg tmp) (OpReg dst)
2195         | otherwise =
2196                 b_code `appOL`
2197                 a_code dst `snocOL`
2198                 instr b_op (OpReg dst)
2199   -- in
2200   return (Any rep code)
2201
2202 regClashesWithOp :: Reg -> Operand -> Bool
2203 reg `regClashesWithOp` OpReg reg2   = reg == reg2
2204 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2205 _   `regClashesWithOp` _            = False
2206
2207 -----------
2208
2209 trivialUCode :: Size -> (Operand -> Instr)
2210              -> CmmExpr -> NatM Register
2211 trivialUCode rep instr x = do
2212   x_code <- getAnyReg x
2213   let
2214      code dst =
2215         x_code dst `snocOL`
2216         instr (OpReg dst)
2217   return (Any rep code)
2218
2219 -----------
2220
2221 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2222                  -> CmmExpr -> CmmExpr -> NatM Register
2223 trivialFCode_x87 instr x y = do
2224   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2225   (y_reg, y_code) <- getSomeReg y
2226   let
2227      size = FF80 -- always, on x87
2228      code dst =
2229         x_code `appOL`
2230         y_code `snocOL`
2231         instr size x_reg y_reg dst
2232   return (Any size code)
2233
2234 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2235                   -> CmmExpr -> CmmExpr -> NatM Register
2236 trivialFCode_sse2 pk instr x y
2237     = genTrivialCode size (instr size) x y
2238     where size = floatSize pk
2239
2240
2241 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2242 trivialUFCode size instr x = do
2243   (x_reg, x_code) <- getSomeReg x
2244   let
2245      code dst =
2246         x_code `snocOL`
2247         instr x_reg dst
2248   -- in
2249   return (Any size code)
2250
2251
2252 --------------------------------------------------------------------------------
2253 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2254 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2255  where
2256    coerce_x87 = do
2257      (x_reg, x_code) <- getSomeReg x
2258      let
2259            opc  = case to of W32 -> GITOF; W64 -> GITOD;
2260                              n -> panic $ "coerceInt2FP.x87: unhandled width ("
2261                                          ++ show n ++ ")"
2262            code dst = x_code `snocOL` opc x_reg dst
2263         -- ToDo: works for non-II32 reps?
2264      return (Any FF80 code)
2265
2266    coerce_sse2 = do
2267      (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2268      let
2269            opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2270                              n -> panic $ "coerceInt2FP.sse: unhandled width ("
2271                                          ++ show n ++ ")"
2272            code dst = x_code `snocOL` opc (intSize from) x_op dst
2273      -- in
2274      return (Any (floatSize to) code)
2275         -- works even if the destination rep is <II32
2276
2277 --------------------------------------------------------------------------------
2278 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2279 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2280  where
2281    coerceFP2Int_x87 = do
2282      (x_reg, x_code) <- getSomeReg x
2283      let
2284            opc  = case from of W32 -> GFTOI; W64 -> GDTOI
2285                                n -> panic $ "coerceFP2Int.x87: unhandled width ("
2286                                            ++ show n ++ ")"
2287            code dst = x_code `snocOL` opc x_reg dst
2288         -- ToDo: works for non-II32 reps?
2289      -- in
2290      return (Any (intSize to) code)
2291
2292    coerceFP2Int_sse2 = do
2293      (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2294      let
2295            opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2296                                n -> panic $ "coerceFP2Init.sse: unhandled width ("
2297                                            ++ show n ++ ")"
2298            code dst = x_code `snocOL` opc (intSize to) x_op dst
2299      -- in
2300      return (Any (intSize to) code)
2301          -- works even if the destination rep is <II32
2302
2303
2304 --------------------------------------------------------------------------------
2305 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2306 coerceFP2FP to x = do
2307   use_sse2 <- sse2Enabled
2308   (x_reg, x_code) <- getSomeReg x
2309   let
2310         opc | use_sse2  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2311                                      n -> panic $ "coerceFP2FP: unhandled width ("
2312                                                  ++ show n ++ ")"
2313             | otherwise = GDTOF
2314         code dst = x_code `snocOL` opc x_reg dst
2315   -- in
2316   return (Any (if use_sse2 then floatSize to else FF80) code)
2317
2318 --------------------------------------------------------------------------------
2319
2320 sse2NegCode :: Width -> CmmExpr -> NatM Register
2321 sse2NegCode w x = do
2322   let sz = floatSize w
2323   x_code <- getAnyReg x
2324   -- This is how gcc does it, so it can't be that bad:
2325   let
2326     const | FF32 <- sz = CmmInt 0x80000000 W32
2327           | otherwise  = CmmInt 0x8000000000000000 W64
2328   Amode amode amode_code <- memConstant (widthInBytes w) const
2329   tmp <- getNewRegNat sz
2330   let
2331     code dst = x_code dst `appOL` amode_code `appOL` toOL [
2332         MOV sz (OpAddr amode) (OpReg tmp),
2333         XOR sz (OpReg tmp) (OpReg dst)
2334         ]
2335   --
2336   return (Any sz code)