compiler/nativeGen/X86/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module X86.CodeGen (
  22         cmmTopCodeGen,
  23         InstrBlock
  24 )
  25
  26 where
  27
  28 #include "HsVersions.h"
  29 #include "nativeGen/NCG.h"
  30 #include "MachDeps.h"
  31
  32 -- NCG stuff:
  33 import X86.Instr
  34 import X86.Cond
  35 import X86.Regs
  36 import X86.RegInfo
  37 import X86.Ppr
  38 import Instruction
  39 import PIC
  40 import NCGMonad
  41 import Size
  42 import Reg
  43 import RegClass
  44 import Platform
  45
  46 -- Our intermediate code:
  47 import BasicTypes
  48 import BlockId
  49 import PprCmm           ( pprExpr )
  50 import Cmm
  51 import CLabel
  52 import ClosureInfo      ( C_SRT(..) )
  53
  54 -- The rest:
  55 import StaticFlags      ( opt_PIC )
  56 import ForeignCall      ( CCallConv(..) )
  57 import OrdList
  58 import Pretty
  59 import qualified Outputable as O
  60 import Outputable
  61 import FastString
  62 import FastBool         ( isFastTrue )
  63 import Constants        ( wORD_SIZE )
  64 import DynFlags
  65
  66 import Debug.Trace      ( trace )
  67
  68 import Control.Monad    ( mapAndUnzipM )
  69 import Data.Maybe       ( fromJust )
  70 import Data.Bits
  71 import Data.Word
  72 import Data.Int
  73
  74
  75 cmmTopCodeGen
  76         :: DynFlags
  77         -> RawCmmTop
  78         -> NatM [NatCmmTop Instr]
  79
  80 cmmTopCodeGen dynflags
  81         (CmmProc info lab params (ListGraph blocks)) = do
  82   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  83   picBaseMb <- getPicBaseMaybeNat
  84   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  85       tops = proc : concat statics
  86       os   = platformOS $ targetPlatform dynflags
  87
  88   case picBaseMb of
  89       Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
  90       Nothing -> return tops
  91
  92 cmmTopCodeGen _ (CmmData sec dat) = do
  93   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  94
  95
  96 basicBlockCodeGen
  97         :: CmmBasicBlock
  98         -> NatM ( [NatBasicBlock Instr]
  99                 , [NatCmmTop Instr])
 100
 101 basicBlockCodeGen (BasicBlock id stmts) = do
 102   instrs <- stmtsToInstrs stmts
 103   -- code generation may introduce new basic block boundaries, which
 104   -- are indicated by the NEWBLOCK instruction.  We must split up the
 105   -- instruction stream into basic blocks again.  Also, we extract
 106   -- LDATAs here too.
 107   let
 108         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 109
 110         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 111           = ([], BasicBlock id instrs : blocks, statics)
 112         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 113           = (instrs, blocks, CmmData sec dat:statics)
 114         mkBlocks instr (instrs,blocks,statics)
 115           = (instr:instrs, blocks, statics)
 116   -- in
 117   return (BasicBlock id top : other_blocks, statics)
 118
 119
 120 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 121 stmtsToInstrs stmts
 122    = do instrss <- mapM stmtToInstrs stmts
 123         return (concatOL instrss)
 124
 125
 126 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 127 stmtToInstrs stmt = case stmt of
 128     CmmNop         -> return nilOL
 129     CmmComment s   -> return (unitOL (COMMENT s))
 130
 131     CmmAssign reg src
 132       | isFloatType ty -> assignReg_FltCode size reg src
 133 #if WORD_SIZE_IN_BITS==32
 134       | isWord64 ty    -> assignReg_I64Code      reg src
 135 #endif
 136       | otherwise        -> assignReg_IntCode size reg src
 137         where ty = cmmRegType reg
 138               size = cmmTypeSize ty
 139
 140     CmmStore addr src
 141       | isFloatType ty -> assignMem_FltCode size addr src
 142 #if WORD_SIZE_IN_BITS==32
 143       | isWord64 ty      -> assignMem_I64Code      addr src
 144 #endif
 145       | otherwise        -> assignMem_IntCode size addr src
 146         where ty = cmmExprType src
 147               size = cmmTypeSize ty
 148
 149     CmmCall target result_regs args _ _
 150        -> genCCall target result_regs args
 151
 152     CmmBranch id          -> genBranch id
 153     CmmCondBranch arg id  -> genCondJump id arg
 154     CmmSwitch arg ids     -> genSwitch arg ids
 155     CmmJump arg params    -> genJump arg
 156     CmmReturn params      ->
 157       panic "stmtToInstrs: return statement should have been cps'd away"
 158
 159
 160 --------------------------------------------------------------------------------
 161 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 162 --      They are really trees of insns to facilitate fast appending, where a
 163 --      left-to-right traversal yields the insns in the correct order.
 164 --
 165 type InstrBlock
 166         = OrdList Instr
 167
 168
 169 -- | Condition codes passed up the tree.
 170 --
 171 data CondCode
 172         = CondCode Bool Cond InstrBlock
 173
 174
 175 -- | a.k.a "Register64"
 176 --      Reg is the lower 32-bit temporary which contains the result.
 177 --      Use getHiVRegFromLo to find the other VRegUnique.
 178 --
 179 --      Rules of this simplified insn selection game are therefore that
 180 --      the returned Reg may be modified
 181 --
 182 data ChildCode64
 183    = ChildCode64
 184         InstrBlock
 185         Reg
 186
 187
 188 -- | Register's passed up the tree.  If the stix code forces the register
 189 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 190 --      otherwise, it comes out as @Any@, and the parent can decide which
 191 --      register to put it in.
 192 --
 193 data Register
 194         = Fixed Size Reg InstrBlock
 195         | Any   Size (Reg -> InstrBlock)
 196
 197
 198 swizzleRegisterRep :: Register -> Size -> Register
 199 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 200 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 201
 202
 203 -- | Grab the Reg for a CmmReg
 204 getRegisterReg :: CmmReg -> Reg
 205
 206 getRegisterReg (CmmLocal (LocalReg u pk))
 207   = mkVReg u (cmmTypeSize pk)
 208
 209 getRegisterReg (CmmGlobal mid)
 210   = case get_GlobalReg_reg_or_addr mid of
 211        Left reg@(RegReal _) -> reg
 212        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 213           -- By this stage, the only MagicIds remaining should be the
 214           -- ones which map to a real machine register on this
 215           -- platform.  Hence ...
 216
 217
 218 -- | Memory addressing modes passed up the tree.
 219 data Amode
 220         = Amode AddrMode InstrBlock
 221
 222 {-
 223 Now, given a tree (the argument to an CmmLoad) that references memory,
 224 produce a suitable addressing mode.
 225
 226 A Rule of the Game (tm) for Amodes: use of the addr bit must
 227 immediately follow use of the code part, since the code part puts
 228 values in registers which the addr then refers to.  So you can't put
 229 anything in between, lest it overwrite some of those registers.  If
 230 you need to do some other computation between the code part and use of
 231 the addr bit, first store the effective address from the amode in a
 232 temporary, then do the other computation, and then use the temporary:
 233
 234     code
 235     LEA amode, tmp
 236     ... other computation ...
 237     ... (tmp) ...
 238 -}
 239
 240
 241 -- | Check whether an integer will fit in 32 bits.
 242 --      A CmmInt is intended to be truncated to the appropriate
 243 --      number of bits, so here we truncate it to Int64.  This is
 244 --      important because e.g. -1 as a CmmInt might be either
 245 --      -1 or 18446744073709551615.
 246 --
 247 is32BitInteger :: Integer -> Bool
 248 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 249   where i64 = fromIntegral i :: Int64
 250
 251
 252 -- | Convert a BlockId to some CmmStatic data
 253 jumpTableEntry :: Maybe BlockId -> CmmStatic
 254 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 255 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
 256     where blockLabel = mkAsmTempLabel id
 257
 258
 259 -- -----------------------------------------------------------------------------
 260 -- General things for putting together code sequences
 261
 262 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 263 -- CmmExprs into CmmRegOff?
 264 mangleIndexTree :: CmmExpr -> CmmExpr
 265 mangleIndexTree (CmmRegOff reg off)
 266   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 267   where width = typeWidth (cmmRegType reg)
 268
 269 -- | The dual to getAnyReg: compute an expression into a register, but
 270 --      we don't mind which one it is.
 271 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 272 getSomeReg expr = do
 273   r <- getRegister expr
 274   case r of
 275     Any rep code -> do
 276         tmp <- getNewRegNat rep
 277         return (tmp, code tmp)
 278     Fixed _ reg code ->
 279         return (reg, code)
 280
 281
 282
 283
 284
 285 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 286 assignMem_I64Code addrTree valueTree = do
 287   Amode addr addr_code <- getAmode addrTree
 288   ChildCode64 vcode rlo <- iselExpr64 valueTree
 289   let
 290         rhi = getHiVRegFromLo rlo
 291
 292         -- Little-endian store
 293         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 294         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 295   -- in
 296   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 297
 298
 299 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 301    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 302    let
 303          r_dst_lo = mkVReg u_dst II32
 304          r_dst_hi = getHiVRegFromLo r_dst_lo
 305          r_src_hi = getHiVRegFromLo r_src_lo
 306          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 307          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 308    -- in
 309    return (
 310         vcode `snocOL` mov_lo `snocOL` mov_hi
 311      )
 312
 313 assignReg_I64Code lvalue valueTree
 314    = panic "assignReg_I64Code(i386): invalid lvalue"
 315
 316
 317
 318
 319 iselExpr64        :: CmmExpr -> NatM ChildCode64
 320 iselExpr64 (CmmLit (CmmInt i _)) = do
 321   (rlo,rhi) <- getNewRegPairNat II32
 322   let
 323         r = fromIntegral (fromIntegral i :: Word32)
 324         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 325         code = toOL [
 326                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 327                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 328                 ]
 329   -- in
 330   return (ChildCode64 code rlo)
 331
 332 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 333    Amode addr addr_code <- getAmode addrTree
 334    (rlo,rhi) <- getNewRegPairNat II32
 335    let
 336         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 337         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 338    -- in
 339    return (
 340             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 341                         rlo
 342      )
 343
 344 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 345    = return (ChildCode64 nilOL (mkVReg vu II32))
 346
 347 -- we handle addition, but rather badly
 348 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 349    ChildCode64 code1 r1lo <- iselExpr64 e1
 350    (rlo,rhi) <- getNewRegPairNat II32
 351    let
 352         r = fromIntegral (fromIntegral i :: Word32)
 353         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 354         r1hi = getHiVRegFromLo r1lo
 355         code =  code1 `appOL`
 356                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 357                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 358                        MOV II32 (OpReg r1hi) (OpReg rhi),
 359                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 360    -- in
 361    return (ChildCode64 code rlo)
 362
 363 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 364    ChildCode64 code1 r1lo <- iselExpr64 e1
 365    ChildCode64 code2 r2lo <- iselExpr64 e2
 366    (rlo,rhi) <- getNewRegPairNat II32
 367    let
 368         r1hi = getHiVRegFromLo r1lo
 369         r2hi = getHiVRegFromLo r2lo
 370         code =  code1 `appOL`
 371                 code2 `appOL`
 372                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 373                        ADD II32 (OpReg r2lo) (OpReg rlo),
 374                        MOV II32 (OpReg r1hi) (OpReg rhi),
 375                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 376    -- in
 377    return (ChildCode64 code rlo)
 378
 379 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 380      fn <- getAnyReg expr
 381      r_dst_lo <-  getNewRegNat II32
 382      let r_dst_hi = getHiVRegFromLo r_dst_lo
 383          code = fn r_dst_lo
 384      return (
 385              ChildCode64 (code `snocOL`
 386                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 387                           r_dst_lo
 388             )
 389
 390 iselExpr64 expr
 391    = pprPanic "iselExpr64(i386)" (ppr expr)
 392
 393
 394
 395 --------------------------------------------------------------------------------
 396 getRegister :: CmmExpr -> NatM Register
 397
 398 #if !x86_64_TARGET_ARCH
 399     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 400     -- register, it can only be used for rip-relative addressing.
 401 getRegister (CmmReg (CmmGlobal PicBaseReg))
 402   = do
 403       reg <- getPicBaseNat archWordSize
 404       return (Fixed archWordSize reg nilOL)
 405 #endif
 406
 407 getRegister (CmmReg reg)
 408   = return (Fixed (cmmTypeSize (cmmRegType reg))
 409                   (getRegisterReg reg) nilOL)
 410
 411 getRegister tree@(CmmRegOff _ _)
 412   = getRegister (mangleIndexTree tree)
 413
 414
 415 #if WORD_SIZE_IN_BITS==32
 416     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 417     -- TO_W_(x), TO_W_(x >> 32)
 418
 419 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 420              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 421   ChildCode64 code rlo <- iselExpr64 x
 422   return $ Fixed II32 (getHiVRegFromLo rlo) code
 423
 424 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 425              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 426   ChildCode64 code rlo <- iselExpr64 x
 427   return $ Fixed II32 (getHiVRegFromLo rlo) code
 428
 429 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 430   ChildCode64 code rlo <- iselExpr64 x
 431   return $ Fixed II32 rlo code
 432
 433 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 434   ChildCode64 code rlo <- iselExpr64 x
 435   return $ Fixed II32 rlo code
 436
 437 #endif
 438
 439
 440
 441
 442 #if i386_TARGET_ARCH
 443
 444 getRegister (CmmLit (CmmFloat f W32)) = do
 445     lbl <- getNewLabelNat
 446     dflags <- getDynFlagsNat
 447     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 448     Amode addr addr_code <- getAmode dynRef
 449     let code dst =
 450             LDATA ReadOnlyData
 451                         [CmmDataLabel lbl,
 452                          CmmStaticLit (CmmFloat f W32)]
 453             `consOL` (addr_code `snocOL`
 454             GLD FF32 addr dst)
 455     -- in
 456     return (Any FF32 code)
 457
 458
 459 getRegister (CmmLit (CmmFloat d W64))
 460   | d == 0.0
 461   = let code dst = unitOL (GLDZ dst)
 462     in  return (Any FF64 code)
 463
 464   | d == 1.0
 465   = let code dst = unitOL (GLD1 dst)
 466     in  return (Any FF64 code)
 467
 468   | otherwise = do
 469     lbl <- getNewLabelNat
 470     dflags <- getDynFlagsNat
 471     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 472     Amode addr addr_code <- getAmode dynRef
 473     let code dst =
 474             LDATA ReadOnlyData
 475                         [CmmDataLabel lbl,
 476                          CmmStaticLit (CmmFloat d W64)]
 477             `consOL` (addr_code `snocOL`
 478             GLD FF64 addr dst)
 479     -- in
 480     return (Any FF64 code)
 481
 482 #endif /* i386_TARGET_ARCH */
 483
 484
 485
 486
 487 #if x86_64_TARGET_ARCH
 488 getRegister (CmmLit (CmmFloat 0.0 w)) = do
 489    let size = floatSize w
 490        code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 491         -- I don't know why there are xorpd, xorps, and pxor instructions.
 492         -- They all appear to do the same thing --SDM
 493    return (Any size code)
 494
 495 getRegister (CmmLit (CmmFloat f w)) = do
 496     lbl <- getNewLabelNat
 497     let code dst = toOL [
 498             LDATA ReadOnlyData
 499                         [CmmDataLabel lbl,
 500                          CmmStaticLit (CmmFloat f w)],
 501             MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 502             ]
 503     -- in
 504     return (Any size code)
 505   where size = floatSize w
 506
 507 #endif /* x86_64_TARGET_ARCH */
 508
 509
 510
 511
 512
 513 -- catch simple cases of zero- or sign-extended load
 514 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 515   code <- intLoadCode (MOVZxL II8) addr
 516   return (Any II32 code)
 517
 518 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 519   code <- intLoadCode (MOVSxL II8) addr
 520   return (Any II32 code)
 521
 522 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 523   code <- intLoadCode (MOVZxL II16) addr
 524   return (Any II32 code)
 525
 526 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 527   code <- intLoadCode (MOVSxL II16) addr
 528   return (Any II32 code)
 529
 530
 531 #if x86_64_TARGET_ARCH
 532
 533 -- catch simple cases of zero- or sign-extended load
 534 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 535   code <- intLoadCode (MOVZxL II8) addr
 536   return (Any II64 code)
 537
 538 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 539   code <- intLoadCode (MOVSxL II8) addr
 540   return (Any II64 code)
 541
 542 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 543   code <- intLoadCode (MOVZxL II16) addr
 544   return (Any II64 code)
 545
 546 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 547   code <- intLoadCode (MOVSxL II16) addr
 548   return (Any II64 code)
 549
 550 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 551   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 552   return (Any II64 code)
 553
 554 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 555   code <- intLoadCode (MOVSxL II32) addr
 556   return (Any II64 code)
 557
 558 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 559                                      CmmLit displacement])
 560     = return $ Any II64 (\dst -> unitOL $
 561         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 562
 563 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
 564   x_code <- getAnyReg x
 565   lbl <- getNewLabelNat
 566   let
 567     code dst = x_code dst `appOL` toOL [
 568         -- This is how gcc does it, so it can't be that bad:
 569         LDATA ReadOnlyData16 [
 570                 CmmAlign 16,
 571                 CmmDataLabel lbl,
 572                 CmmStaticLit (CmmInt 0x80000000 W32),
 573                 CmmStaticLit (CmmInt 0 W32),
 574                 CmmStaticLit (CmmInt 0 W32),
 575                 CmmStaticLit (CmmInt 0 W32)
 576         ],
 577         XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 578                 -- xorps, so we need the 128-bit constant
 579                 -- ToDo: rip-relative
 580         ]
 581   --
 582   return (Any FF32 code)
 583
 584 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
 585   x_code <- getAnyReg x
 586   lbl <- getNewLabelNat
 587   let
 588         -- This is how gcc does it, so it can't be that bad:
 589     code dst = x_code dst `appOL` toOL [
 590         LDATA ReadOnlyData16 [
 591                 CmmAlign 16,
 592                 CmmDataLabel lbl,
 593                 CmmStaticLit (CmmInt 0x8000000000000000 W64),
 594                 CmmStaticLit (CmmInt 0 W64)
 595         ],
 596                 -- gcc puts an unpck here.  Wonder if we need it.
 597         XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 598                 -- xorpd, so we need the 128-bit constant
 599         ]
 600   --
 601   return (Any FF64 code)
 602
 603 #endif /* x86_64_TARGET_ARCH */
 604
 605
 606
 607
 608
 609 getRegister (CmmMachOp mop [x]) -- unary MachOps
 610   = case mop of
 611 #if i386_TARGET_ARCH
 612       MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
 613       MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
 614 #endif
 615
 616       MO_S_Neg w -> triv_ucode NEGI (intSize w)
 617       MO_F_Neg w -> triv_ucode NEGI (floatSize w)
 618       MO_Not w   -> triv_ucode NOT  (intSize w)
 619
 620       -- Nop conversions
 621       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
 622       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
 623       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
 624       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
 625       MO_UU_Conv W32 W16 -> toI16Reg W32 x
 626       MO_SS_Conv W32 W16 -> toI16Reg W32 x
 627
 628 #if x86_64_TARGET_ARCH
 629       MO_UU_Conv W64 W32 -> conversionNop II64 x
 630       MO_SS_Conv W64 W32 -> conversionNop II64 x
 631       MO_UU_Conv W64 W16 -> toI16Reg W64 x
 632       MO_SS_Conv W64 W16 -> toI16Reg W64 x
 633       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
 634       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
 635 #endif
 636
 637       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 638       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 639
 640       -- widenings
 641       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
 642       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
 643       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
 644
 645       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
 646       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
 647       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
 648
 649 #if x86_64_TARGET_ARCH
 650       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
 651       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
 652       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
 653       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
 654       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
 655       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
 656         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 657         -- However, we don't want the register allocator to throw it
 658         -- away as an unnecessary reg-to-reg move, so we keep it in
 659         -- the form of a movzl and print it as a movl later.
 660 #endif
 661
 662 #if i386_TARGET_ARCH
 663       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 664       MO_FF_Conv W64 W32 -> conversionNop FF32 x
 665 #else
 666       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
 667       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
 668 #endif
 669
 670       MO_FS_Conv from to -> coerceFP2Int from to x
 671       MO_SF_Conv from to -> coerceInt2FP from to x
 672
 673       other -> pprPanic "getRegister" (pprMachOp mop)
 674    where
 675         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
 676         triv_ucode instr size = trivialUCode size (instr size) x
 677
 678         -- signed or unsigned extension.
 679         integerExtend :: Width -> Width
 680                       -> (Size -> Operand -> Operand -> Instr)
 681                       -> CmmExpr -> NatM Register
 682         integerExtend from to instr expr = do
 683             (reg,e_code) <- if from == W8 then getByteReg expr
 684                                           else getSomeReg expr
 685             let
 686                 code dst =
 687                   e_code `snocOL`
 688                   instr (intSize from) (OpReg reg) (OpReg dst)
 689             return (Any (intSize to) code)
 690
 691         toI8Reg :: Width -> CmmExpr -> NatM Register
 692         toI8Reg new_rep expr
 693             = do codefn <- getAnyReg expr
 694                  return (Any (intSize new_rep) codefn)
 695                 -- HACK: use getAnyReg to get a byte-addressable register.
 696                 -- If the source was a Fixed register, this will add the
 697                 -- mov instruction to put it into the desired destination.
 698                 -- We're assuming that the destination won't be a fixed
 699                 -- non-byte-addressable register; it won't be, because all
 700                 -- fixed registers are word-sized.
 701
 702         toI16Reg = toI8Reg -- for now
 703
 704         conversionNop :: Size -> CmmExpr -> NatM Register
 705         conversionNop new_size expr
 706             = do e_code <- getRegister expr
 707                  return (swizzleRegisterRep e_code new_size)
 708
 709
 710 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 711   = case mop of
 712       MO_F_Eq w -> condFltReg EQQ x y
 713       MO_F_Ne w -> condFltReg NE x y
 714       MO_F_Gt w -> condFltReg GTT x y
 715       MO_F_Ge w -> condFltReg GE x y
 716       MO_F_Lt w -> condFltReg LTT x y
 717       MO_F_Le w -> condFltReg LE x y
 718
 719       MO_Eq rep   -> condIntReg EQQ x y
 720       MO_Ne rep   -> condIntReg NE x y
 721
 722       MO_S_Gt rep -> condIntReg GTT x y
 723       MO_S_Ge rep -> condIntReg GE x y
 724       MO_S_Lt rep -> condIntReg LTT x y
 725       MO_S_Le rep -> condIntReg LE x y
 726
 727       MO_U_Gt rep -> condIntReg GU  x y
 728       MO_U_Ge rep -> condIntReg GEU x y
 729       MO_U_Lt rep -> condIntReg LU  x y
 730       MO_U_Le rep -> condIntReg LEU x y
 731
 732 #if i386_TARGET_ARCH
 733       MO_F_Add w -> trivialFCode w GADD x y
 734       MO_F_Sub w -> trivialFCode w GSUB x y
 735       MO_F_Quot w -> trivialFCode w GDIV x y
 736       MO_F_Mul w -> trivialFCode w GMUL x y
 737 #endif
 738
 739 #if x86_64_TARGET_ARCH
 740       MO_F_Add w -> trivialFCode w ADD x y
 741       MO_F_Sub w -> trivialFCode w SUB x y
 742       MO_F_Quot w -> trivialFCode w FDIV x y
 743       MO_F_Mul w -> trivialFCode w MUL x y
 744 #endif
 745
 746       MO_Add rep -> add_code rep x y
 747       MO_Sub rep -> sub_code rep x y
 748
 749       MO_S_Quot rep -> div_code rep True  True  x y
 750       MO_S_Rem  rep -> div_code rep True  False x y
 751       MO_U_Quot rep -> div_code rep False True  x y
 752       MO_U_Rem  rep -> div_code rep False False x y
 753
 754       MO_S_MulMayOflo rep -> imulMayOflo rep x y
 755
 756       MO_Mul rep -> triv_op rep IMUL
 757       MO_And rep -> triv_op rep AND
 758       MO_Or  rep -> triv_op rep OR
 759       MO_Xor rep -> triv_op rep XOR
 760
 761         {- Shift ops on x86s have constraints on their source, it
 762            either has to be Imm, CL or 1
 763             => trivialCode is not restrictive enough (sigh.)
 764         -}
 765       MO_Shl rep   -> shift_code rep SHL x y {-False-}
 766       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
 767       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
 768
 769       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
 770   where
 771     --------------------
 772     triv_op width instr = trivialCode width op (Just op) x y
 773                         where op   = instr (intSize width)
 774
 775     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
 776     imulMayOflo rep a b = do
 777          (a_reg, a_code) <- getNonClobberedReg a
 778          b_code <- getAnyReg b
 779          let
 780              shift_amt  = case rep of
 781                            W32 -> 31
 782                            W64 -> 63
 783                            _ -> panic "shift_amt"
 784
 785              size = intSize rep
 786              code = a_code `appOL` b_code eax `appOL`
 787                         toOL [
 788                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
 789                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
 790                                 -- sign extend lower part
 791                            SUB size (OpReg edx) (OpReg eax)
 792                                 -- compare against upper
 793                            -- eax==0 if high part == sign extended low part
 794                         ]
 795          -- in
 796          return (Fixed size eax code)
 797
 798     --------------------
 799     shift_code :: Width
 800                -> (Size -> Operand -> Operand -> Instr)
 801                -> CmmExpr
 802                -> CmmExpr
 803                -> NatM Register
 804
 805     {- Case1: shift length as immediate -}
 806     shift_code width instr x y@(CmmLit lit) = do
 807           x_code <- getAnyReg x
 808           let
 809                size = intSize width
 810                code dst
 811                   = x_code dst `snocOL`
 812                     instr size (OpImm (litToImm lit)) (OpReg dst)
 813           -- in
 814           return (Any size code)
 815
 816     {- Case2: shift length is complex (non-immediate)
 817       * y must go in %ecx.
 818       * we cannot do y first *and* put its result in %ecx, because
 819         %ecx might be clobbered by x.
 820       * if we do y second, then x cannot be
 821         in a clobbered reg.  Also, we cannot clobber x's reg
 822         with the instruction itself.
 823       * so we can either:
 824         - do y first, put its result in a fresh tmp, then copy it to %ecx later
 825         - do y second and put its result into %ecx.  x gets placed in a fresh
 826           tmp.  This is likely to be better, becuase the reg alloc can
 827           eliminate this reg->reg move here (it won't eliminate the other one,
 828           because the move is into the fixed %ecx).
 829     -}
 830     shift_code width instr x y{-amount-} = do
 831         x_code <- getAnyReg x
 832         let size = intSize width
 833         tmp <- getNewRegNat size
 834         y_code <- getAnyReg y
 835         let
 836            code = x_code tmp `appOL`
 837                   y_code ecx `snocOL`
 838                   instr size (OpReg ecx) (OpReg tmp)
 839         -- in
 840         return (Fixed size tmp code)
 841
 842     --------------------
 843     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 844     add_code rep x (CmmLit (CmmInt y _))
 845         | is32BitInteger y = add_int rep x y
 846     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
 847       where size = intSize rep
 848
 849     --------------------
 850     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 851     sub_code rep x (CmmLit (CmmInt y _))
 852         | is32BitInteger (-y) = add_int rep x (-y)
 853     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
 854
 855     -- our three-operand add instruction:
 856     add_int width x y = do
 857         (x_reg, x_code) <- getSomeReg x
 858         let
 859             size = intSize width
 860             imm = ImmInt (fromInteger y)
 861             code dst
 862                = x_code `snocOL`
 863                  LEA size
 864                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
 865                         (OpReg dst)
 866         --
 867         return (Any size code)
 868
 869     ----------------------
 870     div_code width signed quotient x y = do
 871            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
 872            x_code <- getAnyReg x
 873            let
 874              size = intSize width
 875              widen | signed    = CLTD size
 876                    | otherwise = XOR size (OpReg edx) (OpReg edx)
 877
 878              instr | signed    = IDIV
 879                    | otherwise = DIV
 880
 881              code = y_code `appOL`
 882                     x_code eax `appOL`
 883                     toOL [widen, instr size y_op]
 884
 885              result | quotient  = eax
 886                     | otherwise = edx
 887
 888            -- in
 889            return (Fixed size result code)
 890
 891
 892 getRegister (CmmLoad mem pk)
 893   | isFloatType pk
 894   = do
 895     Amode src mem_code <- getAmode mem
 896     let
 897         size = cmmTypeSize pk
 898         code dst = mem_code `snocOL`
 899                    IF_ARCH_i386(GLD size src dst,
 900                                 MOV size (OpAddr src) (OpReg dst))
 901     return (Any size code)
 902
 903 #if i386_TARGET_ARCH
 904 getRegister (CmmLoad mem pk)
 905   | not (isWord64 pk)
 906   = do
 907     code <- intLoadCode instr mem
 908     return (Any size code)
 909   where
 910     width = typeWidth pk
 911     size = intSize width
 912     instr = case width of
 913                 W8     -> MOVZxL II8
 914                 _other -> MOV size
 915         -- We always zero-extend 8-bit loads, if we
 916         -- can't think of anything better.  This is because
 917         -- we can't guarantee access to an 8-bit variant of every register
 918         -- (esi and edi don't have 8-bit variants), so to make things
 919         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
 920 #endif
 921
 922 #if x86_64_TARGET_ARCH
 923 -- Simpler memory load code on x86_64
 924 getRegister (CmmLoad mem pk)
 925   = do
 926     code <- intLoadCode (MOV size) mem
 927     return (Any size code)
 928   where size = intSize $ typeWidth pk
 929 #endif
 930
 931 getRegister (CmmLit (CmmInt 0 width))
 932   = let
 933         size = intSize width
 934
 935         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
 936         adj_size = case size of II64 -> II32; _ -> size
 937         size1 = IF_ARCH_i386( size, adj_size )
 938         code dst
 939            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
 940     in
 941         return (Any size code)
 942
 943 #if x86_64_TARGET_ARCH
 944   -- optimisation for loading small literals on x86_64: take advantage
 945   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
 946   -- instruction forms are shorter.
 947 getRegister (CmmLit lit)
 948   | isWord64 (cmmLitType lit), not (isBigLit lit)
 949   = let
 950         imm = litToImm lit
 951         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
 952     in
 953         return (Any II64 code)
 954   where
 955    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
 956    isBigLit _ = False
 957         -- note1: not the same as (not.is32BitLit), because that checks for
 958         -- signed literals that fit in 32 bits, but we want unsigned
 959         -- literals here.
 960         -- note2: all labels are small, because we're assuming the
 961         -- small memory model (see gcc docs, -mcmodel=small).
 962 #endif
 963
 964 getRegister (CmmLit lit)
 965   = let
 966         size = cmmTypeSize (cmmLitType lit)
 967         imm = litToImm lit
 968         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
 969     in
 970         return (Any size code)
 971
 972 getRegister other = pprPanic "getRegister(x86)" (ppr other)
 973
 974
 975 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
 976    -> NatM (Reg -> InstrBlock)
 977 intLoadCode instr mem = do
 978   Amode src mem_code <- getAmode mem
 979   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
 980
 981 -- Compute an expression into *any* register, adding the appropriate
 982 -- move instruction if necessary.
 983 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
 984 getAnyReg expr = do
 985   r <- getRegister expr
 986   anyReg r
 987
 988 anyReg :: Register -> NatM (Reg -> InstrBlock)
 989 anyReg (Any _ code)          = return code
 990 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
 991
 992 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
 993 -- Fixed registers might not be byte-addressable, so we make sure we've
 994 -- got a temporary, inserting an extra reg copy if necessary.
 995 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
 996 #if x86_64_TARGET_ARCH
 997 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
 998 #else
 999 getByteReg expr = do
1000   r <- getRegister expr
1001   case r of
1002     Any rep code -> do
1003         tmp <- getNewRegNat rep
1004         return (tmp, code tmp)
1005     Fixed rep reg code
1006         | isVirtualReg reg -> return (reg,code)
1007         | otherwise -> do
1008             tmp <- getNewRegNat rep
1009             return (tmp, code `snocOL` reg2reg rep reg tmp)
1010         -- ToDo: could optimise slightly by checking for byte-addressable
1011         -- real registers, but that will happen very rarely if at all.
1012 #endif
1013
1014 -- Another variant: this time we want the result in a register that cannot
1015 -- be modified by code to evaluate an arbitrary expression.
1016 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1017 getNonClobberedReg expr = do
1018   r <- getRegister expr
1019   case r of
1020     Any rep code -> do
1021         tmp <- getNewRegNat rep
1022         return (tmp, code tmp)
1023     Fixed rep reg code
1024         -- only free regs can be clobbered
1025         | RegReal (RealRegSingle rr) <- reg
1026         , isFastTrue (freeReg rr)
1027         -> do
1028                 tmp <- getNewRegNat rep
1029                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1030         | otherwise ->
1031                 return (reg, code)
1032
1033 reg2reg :: Size -> Reg -> Reg -> Instr
1034 reg2reg size src dst
1035 #if i386_TARGET_ARCH
1036   | isFloatSize size = GMOV src dst
1037 #endif
1038   | otherwise        = MOV size (OpReg src) (OpReg dst)
1039
1040
1041
1042 --------------------------------------------------------------------------------
1043 getAmode :: CmmExpr -> NatM Amode
1044 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1045
1046 #if x86_64_TARGET_ARCH
1047
1048 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1049                                      CmmLit displacement])
1050     = return $ Amode (ripRel (litToImm displacement)) nilOL
1051
1052 #endif
1053
1054
1055 -- This is all just ridiculous, since it carefully undoes
1056 -- what mangleIndexTree has just done.
1057 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1058   | is32BitLit lit
1059   -- ASSERT(rep == II32)???
1060   = do (x_reg, x_code) <- getSomeReg x
1061        let off = ImmInt (-(fromInteger i))
1062        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1063
1064 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1065   | is32BitLit lit
1066   -- ASSERT(rep == II32)???
1067   = do (x_reg, x_code) <- getSomeReg x
1068        let off = ImmInt (fromInteger i)
1069        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1070
1071 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1072 -- recognised by the next rule.
1073 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1074                                   b@(CmmLit _)])
1075   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1076
1077 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1078                                         [y, CmmLit (CmmInt shift _)]])
1079   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1080   = x86_complex_amode x y shift 0
1081
1082 getAmode (CmmMachOp (MO_Add rep)
1083                 [x, CmmMachOp (MO_Add _)
1084                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1085                          CmmLit (CmmInt offset _)]])
1086   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1087   && is32BitInteger offset
1088   = x86_complex_amode x y shift offset
1089
1090 getAmode (CmmMachOp (MO_Add rep) [x,y])
1091   = x86_complex_amode x y 0 0
1092
1093 getAmode (CmmLit lit) | is32BitLit lit
1094   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1095
1096 getAmode expr = do
1097   (reg,code) <- getSomeReg expr
1098   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1099
1100
1101 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1102 x86_complex_amode base index shift offset
1103   = do (x_reg, x_code) <- getNonClobberedReg base
1104         -- x must be in a temp, because it has to stay live over y_code
1105         -- we could compre x_reg and y_reg and do something better here...
1106        (y_reg, y_code) <- getSomeReg index
1107        let
1108            code = x_code `appOL` y_code
1109            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1110        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1111                code)
1112
1113
1114
1115
1116 -- -----------------------------------------------------------------------------
1117 -- getOperand: sometimes any operand will do.
1118
1119 -- getNonClobberedOperand: the value of the operand will remain valid across
1120 -- the computation of an arbitrary expression, unless the expression
1121 -- is computed directly into a register which the operand refers to
1122 -- (see trivialCode where this function is used for an example).
1123
1124 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1125 #if x86_64_TARGET_ARCH
1126 getNonClobberedOperand (CmmLit lit)
1127   | isSuitableFloatingPointLit lit = do
1128     lbl <- getNewLabelNat
1129     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1130                                            CmmStaticLit lit])
1131     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1132 #endif
1133 getNonClobberedOperand (CmmLit lit)
1134   | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
1135     return (OpImm (litToImm lit), nilOL)
1136 getNonClobberedOperand (CmmLoad mem pk)
1137   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1138     Amode src mem_code <- getAmode mem
1139     (src',save_code) <-
1140         if (amodeCouldBeClobbered src)
1141                 then do
1142                    tmp <- getNewRegNat archWordSize
1143                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1144                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1145                 else
1146                    return (src, nilOL)
1147     return (OpAddr src', save_code `appOL` mem_code)
1148 getNonClobberedOperand e = do
1149     (reg, code) <- getNonClobberedReg e
1150     return (OpReg reg, code)
1151
1152 amodeCouldBeClobbered :: AddrMode -> Bool
1153 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1154
1155 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1156 regClobbered _ = False
1157
1158 -- getOperand: the operand is not required to remain valid across the
1159 -- computation of an arbitrary expression.
1160 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1161 #if x86_64_TARGET_ARCH
1162 getOperand (CmmLit lit)
1163   | isSuitableFloatingPointLit lit = do
1164     lbl <- getNewLabelNat
1165     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1166                                            CmmStaticLit lit])
1167     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1168 #endif
1169 getOperand (CmmLit lit)
1170   | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
1171     return (OpImm (litToImm lit), nilOL)
1172 getOperand (CmmLoad mem pk)
1173   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1174     Amode src mem_code <- getAmode mem
1175     return (OpAddr src, mem_code)
1176 getOperand e = do
1177     (reg, code) <- getSomeReg e
1178     return (OpReg reg, code)
1179
1180 isOperand :: CmmExpr -> Bool
1181 isOperand (CmmLoad _ _) = True
1182 isOperand (CmmLit lit)  = is32BitLit lit
1183                           || isSuitableFloatingPointLit lit
1184 isOperand _             = False
1185
1186 -- if we want a floating-point literal as an operand, we can
1187 -- use it directly from memory.  However, if the literal is
1188 -- zero, we're better off generating it into a register using
1189 -- xor.
1190 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1191 isSuitableFloatingPointLit _ = False
1192
1193 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1194 getRegOrMem (CmmLoad mem pk)
1195   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1196     Amode src mem_code <- getAmode mem
1197     return (OpAddr src, mem_code)
1198 getRegOrMem e = do
1199     (reg, code) <- getNonClobberedReg e
1200     return (OpReg reg, code)
1201
1202 #if x86_64_TARGET_ARCH
1203 is32BitLit (CmmInt i W64) = is32BitInteger i
1204    -- assume that labels are in the range 0-2^31-1: this assumes the
1205    -- small memory model (see gcc docs, -mcmodel=small).
1206 #endif
1207 is32BitLit x = True
1208
1209
1210
1211
1212 -- Set up a condition code for a conditional branch.
1213
1214 getCondCode :: CmmExpr -> NatM CondCode
1215
1216 -- yes, they really do seem to want exactly the same!
1217
1218 getCondCode (CmmMachOp mop [x, y])
1219   =
1220     case mop of
1221       MO_F_Eq W32 -> condFltCode EQQ x y
1222       MO_F_Ne W32 -> condFltCode NE  x y
1223       MO_F_Gt W32 -> condFltCode GTT x y
1224       MO_F_Ge W32 -> condFltCode GE  x y
1225       MO_F_Lt W32 -> condFltCode LTT x y
1226       MO_F_Le W32 -> condFltCode LE  x y
1227
1228       MO_F_Eq W64 -> condFltCode EQQ x y
1229       MO_F_Ne W64 -> condFltCode NE  x y
1230       MO_F_Gt W64 -> condFltCode GTT x y
1231       MO_F_Ge W64 -> condFltCode GE  x y
1232       MO_F_Lt W64 -> condFltCode LTT x y
1233       MO_F_Le W64 -> condFltCode LE  x y
1234
1235       MO_Eq rep -> condIntCode EQQ  x y
1236       MO_Ne rep -> condIntCode NE   x y
1237
1238       MO_S_Gt rep -> condIntCode GTT  x y
1239       MO_S_Ge rep -> condIntCode GE   x y
1240       MO_S_Lt rep -> condIntCode LTT  x y
1241       MO_S_Le rep -> condIntCode LE   x y
1242
1243       MO_U_Gt rep -> condIntCode GU   x y
1244       MO_U_Ge rep -> condIntCode GEU  x y
1245       MO_U_Lt rep -> condIntCode LU   x y
1246       MO_U_Le rep -> condIntCode LEU  x y
1247
1248       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1249
1250 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1251
1252
1253
1254
1255 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1256 -- passed back up the tree.
1257
1258 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1259
1260 -- memory vs immediate
1261 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1262     Amode x_addr x_code <- getAmode x
1263     let
1264         imm  = litToImm lit
1265         code = x_code `snocOL`
1266                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1267     --
1268     return (CondCode False cond code)
1269
1270 -- anything vs zero, using a mask
1271 -- TODO: Add some sanity checking!!!!
1272 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1273     | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1274     = do
1275       (x_reg, x_code) <- getSomeReg x
1276       let
1277          code = x_code `snocOL`
1278                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1279       --
1280       return (CondCode False cond code)
1281
1282 -- anything vs zero
1283 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1284     (x_reg, x_code) <- getSomeReg x
1285     let
1286         code = x_code `snocOL`
1287                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1288     --
1289     return (CondCode False cond code)
1290
1291 -- anything vs operand
1292 condIntCode cond x y | isOperand y = do
1293     (x_reg, x_code) <- getNonClobberedReg x
1294     (y_op,  y_code) <- getOperand y
1295     let
1296         code = x_code `appOL` y_code `snocOL`
1297                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1298     -- in
1299     return (CondCode False cond code)
1300
1301 -- anything vs anything
1302 condIntCode cond x y = do
1303   (y_reg, y_code) <- getNonClobberedReg y
1304   (x_op, x_code) <- getRegOrMem x
1305   let
1306         code = y_code `appOL`
1307                x_code `snocOL`
1308                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1309   -- in
1310   return (CondCode False cond code)
1311
1312
1313
1314 --------------------------------------------------------------------------------
1315 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1316
1317 #if i386_TARGET_ARCH
1318 condFltCode cond x y
1319   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1320   (x_reg, x_code) <- getNonClobberedReg x
1321   (y_reg, y_code) <- getSomeReg y
1322   let
1323         code = x_code `appOL` y_code `snocOL`
1324                 GCMP cond x_reg y_reg
1325   -- The GCMP insn does the test and sets the zero flag if comparable
1326   -- and true.  Hence we always supply EQQ as the condition to test.
1327   return (CondCode True EQQ code)
1328
1329 #elif x86_64_TARGET_ARCH
1330 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1331 -- an operand, but the right must be a reg.  We can probably do better
1332 -- than this general case...
1333 condFltCode cond x y = do
1334   (x_reg, x_code) <- getNonClobberedReg x
1335   (y_op, y_code) <- getOperand y
1336   let
1337         code = x_code `appOL`
1338                y_code `snocOL`
1339                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1340         -- NB(1): we need to use the unsigned comparison operators on the
1341         -- result of this comparison.
1342   -- in
1343   return (CondCode True (condToUnsigned cond) code)
1344
1345 #else
1346 condFltCode     = panic "X86.condFltCode: not defined"
1347
1348 #endif
1349
1350
1351
1352 -- -----------------------------------------------------------------------------
1353 -- Generating assignments
1354
1355 -- Assignments are really at the heart of the whole code generation
1356 -- business.  Almost all top-level nodes of any real importance are
1357 -- assignments, which correspond to loads, stores, or register
1358 -- transfers.  If we're really lucky, some of the register transfers
1359 -- will go away, because we can use the destination register to
1360 -- complete the code generation for the right hand side.  This only
1361 -- fails when the right hand side is forced into a fixed register
1362 -- (e.g. the result of a call).
1363
1364 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1365 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1366
1367 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1368 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1369
1370
1371 -- integer assignment to memory
1372
1373 -- specific case of adding/subtracting an integer to a particular address.
1374 -- ToDo: catch other cases where we can use an operation directly on a memory
1375 -- address.
1376 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1377                                                  CmmLit (CmmInt i _)])
1378    | addr == addr2, pk /= II64 || is32BitInteger i,
1379      Just instr <- check op
1380    = do Amode amode code_addr <- getAmode addr
1381         let code = code_addr `snocOL`
1382                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1383         return code
1384    where
1385         check (MO_Add _) = Just ADD
1386         check (MO_Sub _) = Just SUB
1387         check _ = Nothing
1388         -- ToDo: more?
1389
1390 -- general case
1391 assignMem_IntCode pk addr src = do
1392     Amode addr code_addr <- getAmode addr
1393     (code_src, op_src)   <- get_op_RI src
1394     let
1395         code = code_src `appOL`
1396                code_addr `snocOL`
1397                   MOV pk op_src (OpAddr addr)
1398         -- NOTE: op_src is stable, so it will still be valid
1399         -- after code_addr.  This may involve the introduction
1400         -- of an extra MOV to a temporary register, but we hope
1401         -- the register allocator will get rid of it.
1402     --
1403     return code
1404   where
1405     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
1406     get_op_RI (CmmLit lit) | is32BitLit lit
1407       = return (nilOL, OpImm (litToImm lit))
1408     get_op_RI op
1409       = do (reg,code) <- getNonClobberedReg op
1410            return (code, OpReg reg)
1411
1412
1413 -- Assign; dst is a reg, rhs is mem
1414 assignReg_IntCode pk reg (CmmLoad src _) = do
1415   load_code <- intLoadCode (MOV pk) src
1416   return (load_code (getRegisterReg reg))
1417
1418 -- dst is a reg, but src could be anything
1419 assignReg_IntCode pk reg src = do
1420   code <- getAnyReg src
1421   return (code (getRegisterReg reg))
1422
1423
1424 -- Floating point assignment to memory
1425 assignMem_FltCode pk addr src = do
1426   (src_reg, src_code) <- getNonClobberedReg src
1427   Amode addr addr_code <- getAmode addr
1428   let
1429         code = src_code `appOL`
1430                addr_code `snocOL`
1431                 IF_ARCH_i386(GST pk src_reg addr,
1432                              MOV pk (OpReg src_reg) (OpAddr addr))
1433   return code
1434
1435 -- Floating point assignment to a register/temporary
1436 assignReg_FltCode pk reg src = do
1437   src_code <- getAnyReg src
1438   return (src_code (getRegisterReg reg))
1439
1440
1441 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1442
1443 genJump (CmmLoad mem pk) = do
1444   Amode target code <- getAmode mem
1445   return (code `snocOL` JMP (OpAddr target))
1446
1447 genJump (CmmLit lit) = do
1448   return (unitOL (JMP (OpImm (litToImm lit))))
1449
1450 genJump expr = do
1451   (reg,code) <- getSomeReg expr
1452   return (code `snocOL` JMP (OpReg reg))
1453
1454
1455 -- -----------------------------------------------------------------------------
1456 --  Unconditional branches
1457
1458 genBranch :: BlockId -> NatM InstrBlock
1459 genBranch = return . toOL . mkJumpInstr
1460
1461
1462
1463 -- -----------------------------------------------------------------------------
1464 --  Conditional jumps
1465
1466 {-
1467 Conditional jumps are always to local labels, so we can use branch
1468 instructions.  We peek at the arguments to decide what kind of
1469 comparison to do.
1470
1471 I386: First, we have to ensure that the condition
1472 codes are set according to the supplied comparison operation.
1473 -}
1474
1475 genCondJump
1476     :: BlockId      -- the branch target
1477     -> CmmExpr      -- the condition on which to branch
1478     -> NatM InstrBlock
1479
1480 #if i386_TARGET_ARCH
1481 genCondJump id bool = do
1482   CondCode _ cond code <- getCondCode bool
1483   return (code `snocOL` JXX cond id)
1484
1485 #elif x86_64_TARGET_ARCH
1486 genCondJump id bool = do
1487   CondCode is_float cond cond_code <- getCondCode bool
1488   if not is_float
1489     then
1490         return (cond_code `snocOL` JXX cond id)
1491     else do
1492         lbl <- getBlockIdNat
1493
1494         -- see comment with condFltReg
1495         let code = case cond of
1496                         NE  -> or_unordered
1497                         GU  -> plain_test
1498                         GEU -> plain_test
1499                         _   -> and_ordered
1500
1501             plain_test = unitOL (
1502                   JXX cond id
1503                 )
1504             or_unordered = toOL [
1505                   JXX cond id,
1506                   JXX PARITY id
1507                 ]
1508             and_ordered = toOL [
1509                   JXX PARITY lbl,
1510                   JXX cond id,
1511                   JXX ALWAYS lbl,
1512                   NEWBLOCK lbl
1513                 ]
1514         return (cond_code `appOL` code)
1515
1516 #else
1517 genCondJump     = panic "X86.genCondJump: not defined"
1518
1519 #endif
1520
1521
1522
1523
1524 -- -----------------------------------------------------------------------------
1525 --  Generating C calls
1526
1527 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
1528 -- @get_arg@, which moves the arguments to the correct registers/stack
1529 -- locations.  Apart from that, the code is easy.
1530 --
1531 -- (If applicable) Do not fill the delay slots here; you will confuse the
1532 -- register allocator.
1533
1534 genCCall
1535     :: CmmCallTarget            -- function to call
1536     -> HintedCmmFormals         -- where to put the result
1537     -> HintedCmmActuals         -- arguments (of mixed type)
1538     -> NatM InstrBlock
1539
1540 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1541
1542 #if i386_TARGET_ARCH
1543
1544 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1545         -- write barrier compiles to no code on x86/x86-64;
1546         -- we keep it this long in order to prevent earlier optimisations.
1547
1548 -- we only cope with a single result for foreign calls
1549 genCCall (CmmPrim op) [CmmHinted r _] args = do
1550   l1 <- getNewLabelNat
1551   l2 <- getNewLabelNat
1552   case op of
1553         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1554         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1555
1556         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1557         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1558
1559         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1560         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1561
1562         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1563         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1564
1565         other_op    -> outOfLineFloatOp op r args
1566  where
1567   actuallyInlineFloatOp instr size [CmmHinted x _]
1568         = do res <- trivialUFCode size (instr size) x
1569              any <- anyReg res
1570              return (any (getRegisterReg (CmmLocal r)))
1571
1572 genCCall target dest_regs args = do
1573     let
1574         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1575 #if !darwin_TARGET_OS
1576         tot_arg_size        = sum sizes
1577 #else
1578         raw_arg_size        = sum sizes
1579         tot_arg_size        = roundTo 16 raw_arg_size
1580         arg_pad_size        = tot_arg_size - raw_arg_size
1581     delta0 <- getDeltaNat
1582     setDeltaNat (delta0 - arg_pad_size)
1583 #endif
1584
1585     push_codes <- mapM push_arg (reverse args)
1586     delta <- getDeltaNat
1587
1588     -- in
1589     -- deal with static vs dynamic call targets
1590     (callinsns,cconv) <-
1591       case target of
1592         -- CmmPrim -> ...
1593         CmmCallee (CmmLit (CmmLabel lbl)) conv
1594            -> -- ToDo: stdcall arg sizes
1595               return (unitOL (CALL (Left fn_imm) []), conv)
1596            where fn_imm = ImmCLbl lbl
1597         CmmCallee expr conv
1598            -> do { (dyn_c, dyn_r) <- get_op expr
1599                  ; ASSERT( isWord32 (cmmExprType expr) )
1600                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1601
1602     let push_code
1603 #if darwin_TARGET_OS
1604             | arg_pad_size /= 0
1605             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1606                     DELTA (delta0 - arg_pad_size)]
1607               `appOL` concatOL push_codes
1608             | otherwise
1609 #endif
1610             = concatOL push_codes
1611         call = callinsns `appOL`
1612                toOL (
1613                         -- Deallocate parameters after call for ccall;
1614                         -- but not for stdcall (callee does it)
1615                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
1616                    [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
1617                   ++
1618                   [DELTA (delta + tot_arg_size)]
1619                )
1620     -- in
1621     setDeltaNat (delta + tot_arg_size)
1622
1623     let
1624         -- assign the results, if necessary
1625         assign_code []     = nilOL
1626         assign_code [CmmHinted dest _hint]
1627           | isFloatType ty = unitOL (GMOV fake0 r_dest)
1628           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1629                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1630           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1631           where
1632                 ty = localRegType dest
1633                 w  = typeWidth ty
1634                 r_dest_hi = getHiVRegFromLo r_dest
1635                 r_dest    = getRegisterReg (CmmLocal dest)
1636         assign_code many = panic "genCCall.assign_code many"
1637
1638     return (push_code `appOL`
1639             call `appOL`
1640             assign_code dest_regs)
1641
1642   where
1643     arg_size :: CmmType -> Int  -- Width in bytes
1644     arg_size ty = widthInBytes (typeWidth ty)
1645
1646     roundTo a x | x `mod` a == 0 = x
1647                 | otherwise = x + a - (x `mod` a)
1648
1649
1650     push_arg :: HintedCmmActual {-current argument-}
1651                     -> NatM InstrBlock  -- code
1652
1653     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
1654       | isWord64 arg_ty = do
1655         ChildCode64 code r_lo <- iselExpr64 arg
1656         delta <- getDeltaNat
1657         setDeltaNat (delta - 8)
1658         let
1659             r_hi = getHiVRegFromLo r_lo
1660         -- in
1661         return (       code `appOL`
1662                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1663                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1664                              DELTA (delta-8)]
1665             )
1666
1667       | otherwise = do
1668         (code, reg) <- get_op arg
1669         delta <- getDeltaNat
1670         let size = arg_size arg_ty      -- Byte size
1671         setDeltaNat (delta-size)
1672         if (isFloatType arg_ty)
1673            then return (code `appOL`
1674                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1675                               DELTA (delta-size),
1676                               GST (floatSize (typeWidth arg_ty))
1677                                   reg (AddrBaseIndex (EABaseReg esp)
1678                                                         EAIndexNone
1679                                                         (ImmInt 0))]
1680                        )
1681            else return (code `snocOL`
1682                         PUSH II32 (OpReg reg) `snocOL`
1683                         DELTA (delta-size)
1684                        )
1685       where
1686          arg_ty = cmmExprType arg
1687
1688     ------------
1689     get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
1690     get_op op = do
1691         (reg,code) <- getSomeReg op
1692         return (code, reg)
1693
1694 #elif x86_64_TARGET_ARCH
1695
1696 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1697         -- write barrier compiles to no code on x86/x86-64;
1698         -- we keep it this long in order to prevent earlier optimisations.
1699
1700
1701 genCCall (CmmPrim op) [CmmHinted r _] args =
1702   outOfLineFloatOp op r args
1703
1704 genCCall target dest_regs args = do
1705
1706         -- load up the register arguments
1707     (stack_args, aregs, fregs, load_args_code)
1708          <- load_args args allArgRegs allFPArgRegs nilOL
1709
1710     let
1711         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
1712         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1713         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1714                 -- for annotating the call instruction with
1715
1716         sse_regs = length fp_regs_used
1717
1718         tot_arg_size = arg_size * length stack_args
1719
1720         -- On entry to the called function, %rsp should be aligned
1721         -- on a 16-byte boundary +8 (i.e. the first stack arg after
1722         -- the return address is 16-byte aligned).  In STG land
1723         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1724         -- need to make sure we push a multiple of 16-bytes of args,
1725         -- plus the return address, to get the correct alignment.
1726         -- Urg, this is hard.  We need to feed the delta back into
1727         -- the arg pushing code.
1728     (real_size, adjust_rsp) <-
1729         if tot_arg_size `rem` 16 == 0
1730             then return (tot_arg_size, nilOL)
1731             else do -- we need to adjust...
1732                 delta <- getDeltaNat
1733                 setDeltaNat (delta-8)
1734                 return (tot_arg_size+8, toOL [
1735                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1736                                 DELTA (delta-8)
1737                         ])
1738
1739         -- push the stack args, right to left
1740     push_code <- push_args (reverse stack_args) nilOL
1741     delta <- getDeltaNat
1742
1743     -- deal with static vs dynamic call targets
1744     (callinsns,cconv) <-
1745       case target of
1746         -- CmmPrim -> ...
1747         CmmCallee (CmmLit (CmmLabel lbl)) conv
1748            -> -- ToDo: stdcall arg sizes
1749               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1750            where fn_imm = ImmCLbl lbl
1751         CmmCallee expr conv
1752            -> do (dyn_r, dyn_c) <- getSomeReg expr
1753                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1754
1755     let
1756         -- The x86_64 ABI requires us to set %al to the number of SSE
1757         -- registers that contain arguments, if the called routine
1758         -- is a varargs function.  We don't know whether it's a
1759         -- varargs function or not, so we have to assume it is.
1760         --
1761         -- It's not safe to omit this assignment, even if the number
1762         -- of SSE regs in use is zero.  If %al is larger than 8
1763         -- on entry to a varargs function, seg faults ensue.
1764         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1765
1766     let call = callinsns `appOL`
1767                toOL (
1768                         -- Deallocate parameters after call for ccall;
1769                         -- but not for stdcall (callee does it)
1770                   (if cconv == StdCallConv || real_size==0 then [] else
1771                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1772                   ++
1773                   [DELTA (delta + real_size)]
1774                )
1775     -- in
1776     setDeltaNat (delta + real_size)
1777
1778     let
1779         -- assign the results, if necessary
1780         assign_code []     = nilOL
1781         assign_code [CmmHinted dest _hint] =
1782           case typeWidth rep of
1783                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1784 v v v v v v v
1785                 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1786 ^ ^ ^ ^ ^ ^ ^
1787                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1788           where
1789                 rep = localRegType dest
1790                 r_dest = getRegisterReg (CmmLocal dest)
1791         assign_code many = panic "genCCall.assign_code many"
1792
1793     return (load_args_code      `appOL`
1794             adjust_rsp          `appOL`
1795             push_code           `appOL`
1796             assign_eax sse_regs `appOL`
1797             call                `appOL`
1798             assign_code dest_regs)
1799
1800   where
1801     arg_size = 8 -- always, at the mo
1802
1803     load_args :: [CmmHinted CmmExpr]
1804               -> [Reg]                  -- int regs avail for args
1805               -> [Reg]                  -- FP regs avail for args
1806               -> InstrBlock
1807               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1808     load_args args [] [] code     =  return (args, [], [], code)
1809         -- no more regs to use
1810     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
1811         -- no more args to push
1812     load_args ((CmmHinted arg hint) : rest) aregs fregs code
1813         | isFloatType arg_rep =
1814         case fregs of
1815           [] -> push_this_arg
1816           (r:rs) -> do
1817              arg_code <- getAnyReg arg
1818              load_args rest aregs rs (code `appOL` arg_code r)
1819         | otherwise =
1820         case aregs of
1821           [] -> push_this_arg
1822           (r:rs) -> do
1823              arg_code <- getAnyReg arg
1824              load_args rest rs fregs (code `appOL` arg_code r)
1825         where
1826           arg_rep = cmmExprType arg
1827
1828           push_this_arg = do
1829             (args',ars,frs,code') <- load_args rest aregs fregs code
1830             return ((CmmHinted arg hint):args', ars, frs, code')
1831
1832     push_args [] code = return code
1833     push_args ((CmmHinted arg hint):rest) code
1834        | isFloatType arg_rep = do
1835          (arg_reg, arg_code) <- getSomeReg arg
1836          delta <- getDeltaNat
1837          setDeltaNat (delta-arg_size)
1838          let code' = code `appOL` arg_code `appOL` toOL [
1839                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1840                         DELTA (delta-arg_size),
1841                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
1842          push_args rest code'
1843
1844        | otherwise = do
1845        -- we only ever generate word-sized function arguments.  Promotion
1846        -- has already happened: our Int8# type is kept sign-extended
1847        -- in an Int#, for example.
1848          ASSERT(width == W64) return ()
1849          (arg_op, arg_code) <- getOperand arg
1850          delta <- getDeltaNat
1851          setDeltaNat (delta-arg_size)
1852          let code' = code `appOL` arg_code `appOL` toOL [
1853                                 PUSH II64 arg_op,
1854                                 DELTA (delta-arg_size)]
1855          push_args rest code'
1856         where
1857           arg_rep = cmmExprType arg
1858           width = typeWidth arg_rep
1859
1860 #else
1861 genCCall        = panic "X86.genCCAll: not defined"
1862
1863 #endif /* x86_64_TARGET_ARCH */
1864
1865
1866
1867
1868 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
1869 outOfLineFloatOp mop res args
1870   = do
1871       dflags <- getDynFlagsNat
1872       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1873       let target = CmmCallee targetExpr CCallConv
1874
1875       if isFloat64 (localRegType res)
1876         then
1877           stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
1878         else do
1879           uq <- getUniqueNat
1880           let
1881             tmp = LocalReg uq f64
1882           -- in
1883           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
1884           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
1885           return (code1 `appOL` code2)
1886   where
1887         lbl = mkForeignLabel fn Nothing False IsFunction
1888
1889         fn = case mop of
1890               MO_F32_Sqrt  -> fsLit "sqrtf"
1891               MO_F32_Sin   -> fsLit "sinf"
1892               MO_F32_Cos   -> fsLit "cosf"
1893               MO_F32_Tan   -> fsLit "tanf"
1894               MO_F32_Exp   -> fsLit "expf"
1895               MO_F32_Log   -> fsLit "logf"
1896
1897               MO_F32_Asin  -> fsLit "asinf"
1898               MO_F32_Acos  -> fsLit "acosf"
1899               MO_F32_Atan  -> fsLit "atanf"
1900
1901               MO_F32_Sinh  -> fsLit "sinhf"
1902               MO_F32_Cosh  -> fsLit "coshf"
1903               MO_F32_Tanh  -> fsLit "tanhf"
1904               MO_F32_Pwr   -> fsLit "powf"
1905
1906               MO_F64_Sqrt  -> fsLit "sqrt"
1907               MO_F64_Sin   -> fsLit "sin"
1908               MO_F64_Cos   -> fsLit "cos"
1909               MO_F64_Tan   -> fsLit "tan"
1910               MO_F64_Exp   -> fsLit "exp"
1911               MO_F64_Log   -> fsLit "log"
1912
1913               MO_F64_Asin  -> fsLit "asin"
1914               MO_F64_Acos  -> fsLit "acos"
1915               MO_F64_Atan  -> fsLit "atan"
1916
1917               MO_F64_Sinh  -> fsLit "sinh"
1918               MO_F64_Cosh  -> fsLit "cosh"
1919               MO_F64_Tanh  -> fsLit "tanh"
1920               MO_F64_Pwr   -> fsLit "pow"
1921
1922
1923
1924
1925
1926 -- -----------------------------------------------------------------------------
1927 -- Generating a table-branch
1928
1929 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1930
1931 genSwitch expr ids
1932   | opt_PIC
1933   = do
1934         (reg,e_code) <- getSomeReg expr
1935         lbl <- getNewLabelNat
1936         dflags <- getDynFlagsNat
1937         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1938         (tableReg,t_code) <- getSomeReg $ dynRef
1939         let
1940             jumpTable = map jumpTableEntryRel ids
1941
1942             jumpTableEntryRel Nothing
1943                 = CmmStaticLit (CmmInt 0 wordWidth)
1944             jumpTableEntryRel (Just (BlockId id))
1945                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1946                 where blockLabel = mkAsmTempLabel id
1947
1948             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1949                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
1950
1951 #if x86_64_TARGET_ARCH
1952 #if darwin_TARGET_OS
1953     -- on Mac OS X/x86_64, put the jump table in the text section
1954     -- to work around a limitation of the linker.
1955     -- ld64 is unable to handle the relocations for
1956     --     .quad L1 - L0
1957     -- if L0 is not preceded by a non-anonymous label in its section.
1958
1959             code = e_code `appOL` t_code `appOL` toOL [
1960                             ADD (intSize wordWidth) op (OpReg tableReg),
1961                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
1962                             LDATA Text (CmmDataLabel lbl : jumpTable)
1963                     ]
1964 #else
1965     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1966     -- relocations, hence we only get 32-bit offsets in the jump
1967     -- table. As these offsets are always negative we need to properly
1968     -- sign extend them to 64-bit. This hack should be removed in
1969     -- conjunction with the hack in PprMach.hs/pprDataItem once
1970     -- binutils 2.17 is standard.
1971             code = e_code `appOL` t_code `appOL` toOL [
1972                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1973                             MOVSxL II32
1974                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1975                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
1976                                    (OpReg reg),
1977                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1978                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1979                    ]
1980 #endif
1981 #else
1982             code = e_code `appOL` t_code `appOL` toOL [
1983                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1984                             ADD (intSize wordWidth) op (OpReg tableReg),
1985                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1986                     ]
1987 #endif
1988         return code
1989   | otherwise
1990   = do
1991         (reg,e_code) <- getSomeReg expr
1992         lbl <- getNewLabelNat
1993         let
1994             jumpTable = map jumpTableEntry ids
1995             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1996             code = e_code `appOL` toOL [
1997                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1998                     JMP_TBL op [ id | Just id <- ids ]
1999                  ]
2000         -- in
2001         return code
2002
2003
2004 -- -----------------------------------------------------------------------------
2005 -- 'condIntReg' and 'condFltReg': condition codes into registers
2006
2007 -- Turn those condition codes into integers now (when they appear on
2008 -- the right hand side of an assignment).
2009 --
2010 -- (If applicable) Do not fill the delay slots here; you will confuse the
2011 -- register allocator.
2012
2013 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2014
2015 condIntReg cond x y = do
2016   CondCode _ cond cond_code <- condIntCode cond x y
2017   tmp <- getNewRegNat II8
2018   let
2019         code dst = cond_code `appOL` toOL [
2020                     SETCC cond (OpReg tmp),
2021                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2022                   ]
2023   -- in
2024   return (Any II32 code)
2025
2026
2027
2028 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2029
2030 #if i386_TARGET_ARCH
2031 condFltReg cond x y = do
2032   CondCode _ cond cond_code <- condFltCode cond x y
2033   tmp <- getNewRegNat II8
2034   let
2035         code dst = cond_code `appOL` toOL [
2036                     SETCC cond (OpReg tmp),
2037                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2038                   ]
2039   -- in
2040   return (Any II32 code)
2041
2042 #elif x86_64_TARGET_ARCH
2043 condFltReg cond x y = do
2044   CondCode _ cond cond_code <- condFltCode cond x y
2045   tmp1 <- getNewRegNat archWordSize
2046   tmp2 <- getNewRegNat archWordSize
2047   let
2048         -- We have to worry about unordered operands (eg. comparisons
2049         -- against NaN).  If the operands are unordered, the comparison
2050         -- sets the parity flag, carry flag and zero flag.
2051         -- All comparisons are supposed to return false for unordered
2052         -- operands except for !=, which returns true.
2053         --
2054         -- Optimisation: we don't have to test the parity flag if we
2055         -- know the test has already excluded the unordered case: eg >
2056         -- and >= test for a zero carry flag, which can only occur for
2057         -- ordered operands.
2058         --
2059         -- ToDo: by reversing comparisons we could avoid testing the
2060         -- parity flag in more cases.
2061
2062         code dst =
2063            cond_code `appOL`
2064              (case cond of
2065                 NE  -> or_unordered dst
2066                 GU  -> plain_test   dst
2067                 GEU -> plain_test   dst
2068                 _   -> and_ordered  dst)
2069
2070         plain_test dst = toOL [
2071                     SETCC cond (OpReg tmp1),
2072                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
2073                  ]
2074         or_unordered dst = toOL [
2075                     SETCC cond (OpReg tmp1),
2076                     SETCC PARITY (OpReg tmp2),
2077                     OR II8 (OpReg tmp1) (OpReg tmp2),
2078                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2079                   ]
2080         and_ordered dst = toOL [
2081                     SETCC cond (OpReg tmp1),
2082                     SETCC NOTPARITY (OpReg tmp2),
2083                     AND II8 (OpReg tmp1) (OpReg tmp2),
2084                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2085                   ]
2086   -- in
2087   return (Any II32 code)
2088
2089 #else
2090 condFltReg      = panic "X86.condFltReg: not defined"
2091
2092 #endif
2093
2094
2095
2096
2097 -- -----------------------------------------------------------------------------
2098 -- 'trivial*Code': deal with trivial instructions
2099
2100 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2101 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2102 -- Only look for constants on the right hand side, because that's
2103 -- where the generic optimizer will have put them.
2104
2105 -- Similarly, for unary instructions, we don't have to worry about
2106 -- matching an StInt as the argument, because genericOpt will already
2107 -- have handled the constant-folding.
2108
2109
2110 {-
2111 The Rules of the Game are:
2112
2113 * You cannot assume anything about the destination register dst;
2114   it may be anything, including a fixed reg.
2115
2116 * You may compute an operand into a fixed reg, but you may not
2117   subsequently change the contents of that fixed reg.  If you
2118   want to do so, first copy the value either to a temporary
2119   or into dst.  You are free to modify dst even if it happens
2120   to be a fixed reg -- that's not your problem.
2121
2122 * You cannot assume that a fixed reg will stay live over an
2123   arbitrary computation.  The same applies to the dst reg.
2124
2125 * Temporary regs obtained from getNewRegNat are distinct from
2126   each other and from all other regs, and stay live over
2127   arbitrary computations.
2128
2129 --------------------
2130
2131 SDM's version of The Rules:
2132
2133 * If getRegister returns Any, that means it can generate correct
2134   code which places the result in any register, period.  Even if that
2135   register happens to be read during the computation.
2136
2137   Corollary #1: this means that if you are generating code for an
2138   operation with two arbitrary operands, you cannot assign the result
2139   of the first operand into the destination register before computing
2140   the second operand.  The second operand might require the old value
2141   of the destination register.
2142
2143   Corollary #2: A function might be able to generate more efficient
2144   code if it knows the destination register is a new temporary (and
2145   therefore not read by any of the sub-computations).
2146
2147 * If getRegister returns Any, then the code it generates may modify only:
2148         (a) fresh temporaries
2149         (b) the destination register
2150         (c) known registers (eg. %ecx is used by shifts)
2151   In particular, it may *not* modify global registers, unless the global
2152   register happens to be the destination register.
2153 -}
2154
2155 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2156   | is32BitLit lit_a = do
2157   b_code <- getAnyReg b
2158   let
2159        code dst
2160          = b_code dst `snocOL`
2161            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2162   -- in
2163   return (Any (intSize width) code)
2164
2165 trivialCode width instr maybe_revinstr a b
2166   = genTrivialCode (intSize width) instr a b
2167
2168 -- This is re-used for floating pt instructions too.
2169 genTrivialCode rep instr a b = do
2170   (b_op, b_code) <- getNonClobberedOperand b
2171   a_code <- getAnyReg a
2172   tmp <- getNewRegNat rep
2173   let
2174      -- We want the value of b to stay alive across the computation of a.
2175      -- But, we want to calculate a straight into the destination register,
2176      -- because the instruction only has two operands (dst := dst `op` src).
2177      -- The troublesome case is when the result of b is in the same register
2178      -- as the destination reg.  In this case, we have to save b in a
2179      -- new temporary across the computation of a.
2180      code dst
2181         | dst `regClashesWithOp` b_op =
2182                 b_code `appOL`
2183                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2184                 a_code dst `snocOL`
2185                 instr (OpReg tmp) (OpReg dst)
2186         | otherwise =
2187                 b_code `appOL`
2188                 a_code dst `snocOL`
2189                 instr b_op (OpReg dst)
2190   -- in
2191   return (Any rep code)
2192
2193 reg `regClashesWithOp` OpReg reg2   = reg == reg2
2194 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2195 reg `regClashesWithOp` _            = False
2196
2197 -----------
2198
2199 trivialUCode rep instr x = do
2200   x_code <- getAnyReg x
2201   let
2202      code dst =
2203         x_code dst `snocOL`
2204         instr (OpReg dst)
2205   return (Any rep code)
2206
2207 -----------
2208
2209 #if i386_TARGET_ARCH
2210
2211 trivialFCode width instr x y = do
2212   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2213   (y_reg, y_code) <- getSomeReg y
2214   let
2215      size = floatSize width
2216      code dst =
2217         x_code `appOL`
2218         y_code `snocOL`
2219         instr size x_reg y_reg dst
2220   return (Any size code)
2221
2222 #endif
2223
2224 #if x86_64_TARGET_ARCH
2225 trivialFCode pk instr x y
2226   = genTrivialCode size (instr size) x y
2227   where size = floatSize pk
2228 #endif
2229
2230 trivialUFCode size instr x = do
2231   (x_reg, x_code) <- getSomeReg x
2232   let
2233      code dst =
2234         x_code `snocOL`
2235         instr x_reg dst
2236   -- in
2237   return (Any size code)
2238
2239
2240 --------------------------------------------------------------------------------
2241 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2242
2243 #if i386_TARGET_ARCH
2244 coerceInt2FP from to x = do
2245   (x_reg, x_code) <- getSomeReg x
2246   let
2247         opc  = case to of W32 -> GITOF; W64 -> GITOD
2248         code dst = x_code `snocOL` opc x_reg dst
2249         -- ToDo: works for non-II32 reps?
2250   return (Any (floatSize to) code)
2251
2252 #elif x86_64_TARGET_ARCH
2253 coerceInt2FP from to x = do
2254   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2255   let
2256         opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2257         code dst = x_code `snocOL` opc x_op dst
2258   -- in
2259   return (Any (floatSize to) code) -- works even if the destination rep is <II32
2260
2261 #else
2262 coerceInt2FP    = panic "X86.coerceInt2FP: not defined"
2263
2264 #endif
2265
2266
2267
2268
2269 --------------------------------------------------------------------------------
2270 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2271
2272 #if i386_TARGET_ARCH
2273 coerceFP2Int from to x = do
2274   (x_reg, x_code) <- getSomeReg x
2275   let
2276         opc  = case from of W32 -> GFTOI; W64 -> GDTOI
2277         code dst = x_code `snocOL` opc x_reg dst
2278         -- ToDo: works for non-II32 reps?
2279   -- in
2280   return (Any (intSize to) code)
2281
2282 #elif x86_64_TARGET_ARCH
2283 coerceFP2Int from to x = do
2284   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2285   let
2286         opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2287         code dst = x_code `snocOL` opc x_op dst
2288   -- in
2289   return (Any (intSize to) code) -- works even if the destination rep is <II32
2290
2291 #else
2292 coerceFP2Int    = panic "X86.coerceFP2Int: not defined"
2293
2294 #endif
2295
2296
2297
2298
2299 --------------------------------------------------------------------------------
2300 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2301
2302 #if x86_64_TARGET_ARCH
2303 coerceFP2FP to x = do
2304   (x_reg, x_code) <- getSomeReg x
2305   let
2306         opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2307         code dst = x_code `snocOL` opc x_reg dst
2308   -- in
2309   return (Any (floatSize to) code)
2310
2311 #else
2312 coerceFP2FP     = panic "X86.coerceFP2FP: not defined"
2313
2314 #endif
2315
2316
2317