compiler/nativeGen/X86/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module X86.CodeGen (
  22         cmmTopCodeGen,
  23         InstrBlock
  24 )
  25
  26 where
  27
  28 #include "HsVersions.h"
  29 #include "nativeGen/NCG.h"
  30 #include "MachDeps.h"
  31
  32 -- NCG stuff:
  33 import X86.Instr
  34 import X86.Cond
  35 import X86.Regs
  36 import X86.RegInfo
  37 import X86.Ppr
  38 import Instruction
  39 import PIC
  40 import NCGMonad
  41 import Size
  42 import Reg
  43 import RegClass
  44 import Platform
  45
  46 -- Our intermediate code:
  47 import BasicTypes
  48 import BlockId
  49 import PprCmm           ( pprExpr )
  50 import Cmm
  51 import CLabel
  52 import ClosureInfo      ( C_SRT(..) )
  53
  54 -- The rest:
  55 import StaticFlags      ( opt_PIC )
  56 import ForeignCall      ( CCallConv(..) )
  57 import OrdList
  58 import Pretty
  59 import qualified Outputable as O
  60 import Outputable
  61 import FastString
  62 import FastBool         ( isFastTrue )
  63 import Constants        ( wORD_SIZE )
  64 import DynFlags
  65
  66 import Debug.Trace      ( trace )
  67
  68 import Control.Monad    ( mapAndUnzipM )
  69 import Data.Maybe       ( fromJust )
  70 import Data.Bits
  71 import Data.Word
  72 import Data.Int
  73
  74
  75 cmmTopCodeGen
  76         :: DynFlags
  77         -> RawCmmTop
  78         -> NatM [NatCmmTop Instr]
  79
  80 cmmTopCodeGen dynflags
  81         (CmmProc info lab params (ListGraph blocks)) = do
  82   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  83   picBaseMb <- getPicBaseMaybeNat
  84   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  85       tops = proc : concat statics
  86       os   = platformOS $ targetPlatform dynflags
  87
  88   case picBaseMb of
  89       Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
  90       Nothing -> return tops
  91
  92 cmmTopCodeGen _ (CmmData sec dat) = do
  93   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  94
  95
  96 basicBlockCodeGen
  97         :: CmmBasicBlock
  98         -> NatM ( [NatBasicBlock Instr]
  99                 , [NatCmmTop Instr])
 100
 101 basicBlockCodeGen (BasicBlock id stmts) = do
 102   instrs <- stmtsToInstrs stmts
 103   -- code generation may introduce new basic block boundaries, which
 104   -- are indicated by the NEWBLOCK instruction.  We must split up the
 105   -- instruction stream into basic blocks again.  Also, we extract
 106   -- LDATAs here too.
 107   let
 108         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 109
 110         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 111           = ([], BasicBlock id instrs : blocks, statics)
 112         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 113           = (instrs, blocks, CmmData sec dat:statics)
 114         mkBlocks instr (instrs,blocks,statics)
 115           = (instr:instrs, blocks, statics)
 116   -- in
 117   return (BasicBlock id top : other_blocks, statics)
 118
 119
 120 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 121 stmtsToInstrs stmts
 122    = do instrss <- mapM stmtToInstrs stmts
 123         return (concatOL instrss)
 124
 125
 126 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 127 stmtToInstrs stmt = case stmt of
 128     CmmNop         -> return nilOL
 129     CmmComment s   -> return (unitOL (COMMENT s))
 130
 131     CmmAssign reg src
 132       | isFloatType ty -> assignReg_FltCode size reg src
 133 #if WORD_SIZE_IN_BITS==32
 134       | isWord64 ty    -> assignReg_I64Code      reg src
 135 #endif
 136       | otherwise        -> assignReg_IntCode size reg src
 137         where ty = cmmRegType reg
 138               size = cmmTypeSize ty
 139
 140     CmmStore addr src
 141       | isFloatType ty -> assignMem_FltCode size addr src
 142 #if WORD_SIZE_IN_BITS==32
 143       | isWord64 ty      -> assignMem_I64Code      addr src
 144 #endif
 145       | otherwise        -> assignMem_IntCode size addr src
 146         where ty = cmmExprType src
 147               size = cmmTypeSize ty
 148
 149     CmmCall target result_regs args _ _
 150        -> genCCall target result_regs args
 151
 152     CmmBranch id          -> genBranch id
 153     CmmCondBranch arg id  -> genCondJump id arg
 154     CmmSwitch arg ids     -> genSwitch arg ids
 155     CmmJump arg params    -> genJump arg
 156     CmmReturn params      ->
 157       panic "stmtToInstrs: return statement should have been cps'd away"
 158
 159
 160 --------------------------------------------------------------------------------
 161 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 162 --      They are really trees of insns to facilitate fast appending, where a
 163 --      left-to-right traversal yields the insns in the correct order.
 164 --
 165 type InstrBlock
 166         = OrdList Instr
 167
 168
 169 -- | Condition codes passed up the tree.
 170 --
 171 data CondCode
 172         = CondCode Bool Cond InstrBlock
 173
 174
 175 -- | a.k.a "Register64"
 176 --      Reg is the lower 32-bit temporary which contains the result.
 177 --      Use getHiVRegFromLo to find the other VRegUnique.
 178 --
 179 --      Rules of this simplified insn selection game are therefore that
 180 --      the returned Reg may be modified
 181 --
 182 data ChildCode64
 183    = ChildCode64
 184         InstrBlock
 185         Reg
 186
 187
 188 -- | Register's passed up the tree.  If the stix code forces the register
 189 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 190 --      otherwise, it comes out as @Any@, and the parent can decide which
 191 --      register to put it in.
 192 --
 193 data Register
 194         = Fixed Size Reg InstrBlock
 195         | Any   Size (Reg -> InstrBlock)
 196
 197
 198 swizzleRegisterRep :: Register -> Size -> Register
 199 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 200 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 201
 202
 203 -- | Grab the Reg for a CmmReg
 204 getRegisterReg :: CmmReg -> Reg
 205
 206 getRegisterReg (CmmLocal (LocalReg u pk))
 207   = mkVReg u (cmmTypeSize pk)
 208
 209 getRegisterReg (CmmGlobal mid)
 210   = case get_GlobalReg_reg_or_addr mid of
 211        Left (RealReg rrno) -> RealReg rrno
 212        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 213           -- By this stage, the only MagicIds remaining should be the
 214           -- ones which map to a real machine register on this
 215           -- platform.  Hence ...
 216
 217
 218 -- | Memory addressing modes passed up the tree.
 219 data Amode
 220         = Amode AddrMode InstrBlock
 221
 222 {-
 223 Now, given a tree (the argument to an CmmLoad) that references memory,
 224 produce a suitable addressing mode.
 225
 226 A Rule of the Game (tm) for Amodes: use of the addr bit must
 227 immediately follow use of the code part, since the code part puts
 228 values in registers which the addr then refers to.  So you can't put
 229 anything in between, lest it overwrite some of those registers.  If
 230 you need to do some other computation between the code part and use of
 231 the addr bit, first store the effective address from the amode in a
 232 temporary, then do the other computation, and then use the temporary:
 233
 234     code
 235     LEA amode, tmp
 236     ... other computation ...
 237     ... (tmp) ...
 238 -}
 239
 240
 241 -- | Check whether an integer will fit in 32 bits.
 242 --      A CmmInt is intended to be truncated to the appropriate
 243 --      number of bits, so here we truncate it to Int64.  This is
 244 --      important because e.g. -1 as a CmmInt might be either
 245 --      -1 or 18446744073709551615.
 246 --
 247 is32BitInteger :: Integer -> Bool
 248 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 249   where i64 = fromIntegral i :: Int64
 250
 251
 252 -- | Convert a BlockId to some CmmStatic data
 253 jumpTableEntry :: Maybe BlockId -> CmmStatic
 254 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 255 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
 256     where blockLabel = mkAsmTempLabel id
 257
 258
 259 -- -----------------------------------------------------------------------------
 260 -- General things for putting together code sequences
 261
 262 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 263 -- CmmExprs into CmmRegOff?
 264 mangleIndexTree :: CmmExpr -> CmmExpr
 265 mangleIndexTree (CmmRegOff reg off)
 266   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 267   where width = typeWidth (cmmRegType reg)
 268
 269 -- | The dual to getAnyReg: compute an expression into a register, but
 270 --      we don't mind which one it is.
 271 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 272 getSomeReg expr = do
 273   r <- getRegister expr
 274   case r of
 275     Any rep code -> do
 276         tmp <- getNewRegNat rep
 277         return (tmp, code tmp)
 278     Fixed _ reg code ->
 279         return (reg, code)
 280
 281
 282
 283
 284
 285 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 286 assignMem_I64Code addrTree valueTree = do
 287   Amode addr addr_code <- getAmode addrTree
 288   ChildCode64 vcode rlo <- iselExpr64 valueTree
 289   let
 290         rhi = getHiVRegFromLo rlo
 291
 292         -- Little-endian store
 293         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 294         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 295   -- in
 296   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 297
 298
 299 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 301    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 302    let
 303          r_dst_lo = mkVReg u_dst II32
 304          r_dst_hi = getHiVRegFromLo r_dst_lo
 305          r_src_hi = getHiVRegFromLo r_src_lo
 306          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 307          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 308    -- in
 309    return (
 310         vcode `snocOL` mov_lo `snocOL` mov_hi
 311      )
 312
 313 assignReg_I64Code lvalue valueTree
 314    = panic "assignReg_I64Code(i386): invalid lvalue"
 315
 316
 317
 318
 319 iselExpr64        :: CmmExpr -> NatM ChildCode64
 320 iselExpr64 (CmmLit (CmmInt i _)) = do
 321   (rlo,rhi) <- getNewRegPairNat II32
 322   let
 323         r = fromIntegral (fromIntegral i :: Word32)
 324         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 325         code = toOL [
 326                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 327                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 328                 ]
 329   -- in
 330   return (ChildCode64 code rlo)
 331
 332 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 333    Amode addr addr_code <- getAmode addrTree
 334    (rlo,rhi) <- getNewRegPairNat II32
 335    let
 336         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 337         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 338    -- in
 339    return (
 340             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 341                         rlo
 342      )
 343
 344 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 345    = return (ChildCode64 nilOL (mkVReg vu II32))
 346
 347 -- we handle addition, but rather badly
 348 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 349    ChildCode64 code1 r1lo <- iselExpr64 e1
 350    (rlo,rhi) <- getNewRegPairNat II32
 351    let
 352         r = fromIntegral (fromIntegral i :: Word32)
 353         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 354         r1hi = getHiVRegFromLo r1lo
 355         code =  code1 `appOL`
 356                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 357                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 358                        MOV II32 (OpReg r1hi) (OpReg rhi),
 359                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 360    -- in
 361    return (ChildCode64 code rlo)
 362
 363 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 364    ChildCode64 code1 r1lo <- iselExpr64 e1
 365    ChildCode64 code2 r2lo <- iselExpr64 e2
 366    (rlo,rhi) <- getNewRegPairNat II32
 367    let
 368         r1hi = getHiVRegFromLo r1lo
 369         r2hi = getHiVRegFromLo r2lo
 370         code =  code1 `appOL`
 371                 code2 `appOL`
 372                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 373                        ADD II32 (OpReg r2lo) (OpReg rlo),
 374                        MOV II32 (OpReg r1hi) (OpReg rhi),
 375                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 376    -- in
 377    return (ChildCode64 code rlo)
 378
 379 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 380      fn <- getAnyReg expr
 381      r_dst_lo <-  getNewRegNat II32
 382      let r_dst_hi = getHiVRegFromLo r_dst_lo
 383          code = fn r_dst_lo
 384      return (
 385              ChildCode64 (code `snocOL`
 386                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 387                           r_dst_lo
 388             )
 389
 390 iselExpr64 expr
 391    = pprPanic "iselExpr64(i386)" (ppr expr)
 392
 393
 394
 395 --------------------------------------------------------------------------------
 396 getRegister :: CmmExpr -> NatM Register
 397
 398 #if !x86_64_TARGET_ARCH
 399     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 400     -- register, it can only be used for rip-relative addressing.
 401 getRegister (CmmReg (CmmGlobal PicBaseReg))
 402   = do
 403       reg <- getPicBaseNat archWordSize
 404       return (Fixed archWordSize reg nilOL)
 405 #endif
 406
 407 getRegister (CmmReg reg)
 408   = return (Fixed (cmmTypeSize (cmmRegType reg))
 409                   (getRegisterReg reg) nilOL)
 410
 411 getRegister tree@(CmmRegOff _ _)
 412   = getRegister (mangleIndexTree tree)
 413
 414
 415 #if WORD_SIZE_IN_BITS==32
 416     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 417     -- TO_W_(x), TO_W_(x >> 32)
 418
 419 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 420              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 421   ChildCode64 code rlo <- iselExpr64 x
 422   return $ Fixed II32 (getHiVRegFromLo rlo) code
 423
 424 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 425              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 426   ChildCode64 code rlo <- iselExpr64 x
 427   return $ Fixed II32 (getHiVRegFromLo rlo) code
 428
 429 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 430   ChildCode64 code rlo <- iselExpr64 x
 431   return $ Fixed II32 rlo code
 432
 433 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 434   ChildCode64 code rlo <- iselExpr64 x
 435   return $ Fixed II32 rlo code
 436
 437 #endif
 438
 439
 440
 441
 442 #if i386_TARGET_ARCH
 443
 444 getRegister (CmmLit (CmmFloat f W32)) = do
 445     lbl <- getNewLabelNat
 446     dflags <- getDynFlagsNat
 447     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 448     Amode addr addr_code <- getAmode dynRef
 449     let code dst =
 450             LDATA ReadOnlyData
 451                         [CmmDataLabel lbl,
 452                          CmmStaticLit (CmmFloat f W32)]
 453             `consOL` (addr_code `snocOL`
 454             GLD FF32 addr dst)
 455     -- in
 456     return (Any FF32 code)
 457
 458
 459 getRegister (CmmLit (CmmFloat d W64))
 460   | d == 0.0
 461   = let code dst = unitOL (GLDZ dst)
 462     in  return (Any FF64 code)
 463
 464   | d == 1.0
 465   = let code dst = unitOL (GLD1 dst)
 466     in  return (Any FF64 code)
 467
 468   | otherwise = do
 469     lbl <- getNewLabelNat
 470     dflags <- getDynFlagsNat
 471     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 472     Amode addr addr_code <- getAmode dynRef
 473     let code dst =
 474             LDATA ReadOnlyData
 475                         [CmmDataLabel lbl,
 476                          CmmStaticLit (CmmFloat d W64)]
 477             `consOL` (addr_code `snocOL`
 478             GLD FF64 addr dst)
 479     -- in
 480     return (Any FF64 code)
 481
 482 #endif /* i386_TARGET_ARCH */
 483
 484
 485
 486
 487 #if x86_64_TARGET_ARCH
 488 getRegister (CmmLit (CmmFloat 0.0 w)) = do
 489    let size = floatSize w
 490        code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 491         -- I don't know why there are xorpd, xorps, and pxor instructions.
 492         -- They all appear to do the same thing --SDM
 493    return (Any size code)
 494
 495 getRegister (CmmLit (CmmFloat f w)) = do
 496     lbl <- getNewLabelNat
 497     let code dst = toOL [
 498             LDATA ReadOnlyData
 499                         [CmmDataLabel lbl,
 500                          CmmStaticLit (CmmFloat f w)],
 501             MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 502             ]
 503     -- in
 504     return (Any size code)
 505   where size = floatSize w
 506
 507 #endif /* x86_64_TARGET_ARCH */
 508
 509
 510
 511
 512
 513 -- catch simple cases of zero- or sign-extended load
 514 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 515   code <- intLoadCode (MOVZxL II8) addr
 516   return (Any II32 code)
 517
 518 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 519   code <- intLoadCode (MOVSxL II8) addr
 520   return (Any II32 code)
 521
 522 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 523   code <- intLoadCode (MOVZxL II16) addr
 524   return (Any II32 code)
 525
 526 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 527   code <- intLoadCode (MOVSxL II16) addr
 528   return (Any II32 code)
 529
 530
 531 #if x86_64_TARGET_ARCH
 532
 533 -- catch simple cases of zero- or sign-extended load
 534 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 535   code <- intLoadCode (MOVZxL II8) addr
 536   return (Any II64 code)
 537
 538 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 539   code <- intLoadCode (MOVSxL II8) addr
 540   return (Any II64 code)
 541
 542 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 543   code <- intLoadCode (MOVZxL II16) addr
 544   return (Any II64 code)
 545
 546 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 547   code <- intLoadCode (MOVSxL II16) addr
 548   return (Any II64 code)
 549
 550 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 551   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 552   return (Any II64 code)
 553
 554 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 555   code <- intLoadCode (MOVSxL II32) addr
 556   return (Any II64 code)
 557
 558 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 559                                      CmmLit displacement])
 560     = return $ Any II64 (\dst -> unitOL $
 561         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 562
 563 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
 564   x_code <- getAnyReg x
 565   lbl <- getNewLabelNat
 566   let
 567     code dst = x_code dst `appOL` toOL [
 568         -- This is how gcc does it, so it can't be that bad:
 569         LDATA ReadOnlyData16 [
 570                 CmmAlign 16,
 571                 CmmDataLabel lbl,
 572                 CmmStaticLit (CmmInt 0x80000000 W32),
 573                 CmmStaticLit (CmmInt 0 W32),
 574                 CmmStaticLit (CmmInt 0 W32),
 575                 CmmStaticLit (CmmInt 0 W32)
 576         ],
 577         XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 578                 -- xorps, so we need the 128-bit constant
 579                 -- ToDo: rip-relative
 580         ]
 581   --
 582   return (Any FF32 code)
 583
 584 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
 585   x_code <- getAnyReg x
 586   lbl <- getNewLabelNat
 587   let
 588         -- This is how gcc does it, so it can't be that bad:
 589     code dst = x_code dst `appOL` toOL [
 590         LDATA ReadOnlyData16 [
 591                 CmmAlign 16,
 592                 CmmDataLabel lbl,
 593                 CmmStaticLit (CmmInt 0x8000000000000000 W64),
 594                 CmmStaticLit (CmmInt 0 W64)
 595         ],
 596                 -- gcc puts an unpck here.  Wonder if we need it.
 597         XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 598                 -- xorpd, so we need the 128-bit constant
 599         ]
 600   --
 601   return (Any FF64 code)
 602
 603 #endif /* x86_64_TARGET_ARCH */
 604
 605
 606
 607
 608
 609 getRegister (CmmMachOp mop [x]) -- unary MachOps
 610   = case mop of
 611 #if i386_TARGET_ARCH
 612       MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
 613       MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
 614 #endif
 615
 616       MO_S_Neg w -> triv_ucode NEGI (intSize w)
 617       MO_F_Neg w -> triv_ucode NEGI (floatSize w)
 618       MO_Not w   -> triv_ucode NOT  (intSize w)
 619
 620       -- Nop conversions
 621       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
 622       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
 623       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
 624       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
 625       MO_UU_Conv W32 W16 -> toI16Reg W32 x
 626       MO_SS_Conv W32 W16 -> toI16Reg W32 x
 627
 628 #if x86_64_TARGET_ARCH
 629       MO_UU_Conv W64 W32 -> conversionNop II64 x
 630       MO_SS_Conv W64 W32 -> conversionNop II64 x
 631       MO_UU_Conv W64 W16 -> toI16Reg W64 x
 632       MO_SS_Conv W64 W16 -> toI16Reg W64 x
 633       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
 634       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
 635 #endif
 636
 637       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 638       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 639
 640       -- widenings
 641       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
 642       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
 643       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
 644
 645       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
 646       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
 647       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
 648
 649 #if x86_64_TARGET_ARCH
 650       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
 651       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
 652       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
 653       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
 654       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
 655       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
 656         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 657         -- However, we don't want the register allocator to throw it
 658         -- away as an unnecessary reg-to-reg move, so we keep it in
 659         -- the form of a movzl and print it as a movl later.
 660 #endif
 661
 662 #if i386_TARGET_ARCH
 663       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 664       MO_FF_Conv W64 W32 -> conversionNop FF32 x
 665 #else
 666       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
 667       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
 668 #endif
 669
 670       MO_FS_Conv from to -> coerceFP2Int from to x
 671       MO_SF_Conv from to -> coerceInt2FP from to x
 672
 673       other -> pprPanic "getRegister" (pprMachOp mop)
 674    where
 675         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
 676         triv_ucode instr size = trivialUCode size (instr size) x
 677
 678         -- signed or unsigned extension.
 679         integerExtend :: Width -> Width
 680                       -> (Size -> Operand -> Operand -> Instr)
 681                       -> CmmExpr -> NatM Register
 682         integerExtend from to instr expr = do
 683             (reg,e_code) <- if from == W8 then getByteReg expr
 684                                           else getSomeReg expr
 685             let
 686                 code dst =
 687                   e_code `snocOL`
 688                   instr (intSize from) (OpReg reg) (OpReg dst)
 689             return (Any (intSize to) code)
 690
 691         toI8Reg :: Width -> CmmExpr -> NatM Register
 692         toI8Reg new_rep expr
 693             = do codefn <- getAnyReg expr
 694                  return (Any (intSize new_rep) codefn)
 695                 -- HACK: use getAnyReg to get a byte-addressable register.
 696                 -- If the source was a Fixed register, this will add the
 697                 -- mov instruction to put it into the desired destination.
 698                 -- We're assuming that the destination won't be a fixed
 699                 -- non-byte-addressable register; it won't be, because all
 700                 -- fixed registers are word-sized.
 701
 702         toI16Reg = toI8Reg -- for now
 703
 704         conversionNop :: Size -> CmmExpr -> NatM Register
 705         conversionNop new_size expr
 706             = do e_code <- getRegister expr
 707                  return (swizzleRegisterRep e_code new_size)
 708
 709
 710 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 711   = case mop of
 712       MO_F_Eq w -> condFltReg EQQ x y
 713       MO_F_Ne w -> condFltReg NE x y
 714       MO_F_Gt w -> condFltReg GTT x y
 715       MO_F_Ge w -> condFltReg GE x y
 716       MO_F_Lt w -> condFltReg LTT x y
 717       MO_F_Le w -> condFltReg LE x y
 718
 719       MO_Eq rep   -> condIntReg EQQ x y
 720       MO_Ne rep   -> condIntReg NE x y
 721
 722       MO_S_Gt rep -> condIntReg GTT x y
 723       MO_S_Ge rep -> condIntReg GE x y
 724       MO_S_Lt rep -> condIntReg LTT x y
 725       MO_S_Le rep -> condIntReg LE x y
 726
 727       MO_U_Gt rep -> condIntReg GU  x y
 728       MO_U_Ge rep -> condIntReg GEU x y
 729       MO_U_Lt rep -> condIntReg LU  x y
 730       MO_U_Le rep -> condIntReg LEU x y
 731
 732 #if i386_TARGET_ARCH
 733       MO_F_Add w -> trivialFCode w GADD x y
 734       MO_F_Sub w -> trivialFCode w GSUB x y
 735       MO_F_Quot w -> trivialFCode w GDIV x y
 736       MO_F_Mul w -> trivialFCode w GMUL x y
 737 #endif
 738
 739 #if x86_64_TARGET_ARCH
 740       MO_F_Add w -> trivialFCode w ADD x y
 741       MO_F_Sub w -> trivialFCode w SUB x y
 742       MO_F_Quot w -> trivialFCode w FDIV x y
 743       MO_F_Mul w -> trivialFCode w MUL x y
 744 #endif
 745
 746       MO_Add rep -> add_code rep x y
 747       MO_Sub rep -> sub_code rep x y
 748
 749       MO_S_Quot rep -> div_code rep True  True  x y
 750       MO_S_Rem  rep -> div_code rep True  False x y
 751       MO_U_Quot rep -> div_code rep False True  x y
 752       MO_U_Rem  rep -> div_code rep False False x y
 753
 754       MO_S_MulMayOflo rep -> imulMayOflo rep x y
 755
 756       MO_Mul rep -> triv_op rep IMUL
 757       MO_And rep -> triv_op rep AND
 758       MO_Or  rep -> triv_op rep OR
 759       MO_Xor rep -> triv_op rep XOR
 760
 761         {- Shift ops on x86s have constraints on their source, it
 762            either has to be Imm, CL or 1
 763             => trivialCode is not restrictive enough (sigh.)
 764         -}
 765       MO_Shl rep   -> shift_code rep SHL x y {-False-}
 766       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
 767       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
 768
 769       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
 770   where
 771     --------------------
 772     triv_op width instr = trivialCode width op (Just op) x y
 773                         where op   = instr (intSize width)
 774
 775     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
 776     imulMayOflo rep a b = do
 777          (a_reg, a_code) <- getNonClobberedReg a
 778          b_code <- getAnyReg b
 779          let
 780              shift_amt  = case rep of
 781                            W32 -> 31
 782                            W64 -> 63
 783                            _ -> panic "shift_amt"
 784
 785              size = intSize rep
 786              code = a_code `appOL` b_code eax `appOL`
 787                         toOL [
 788                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
 789                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
 790                                 -- sign extend lower part
 791                            SUB size (OpReg edx) (OpReg eax)
 792                                 -- compare against upper
 793                            -- eax==0 if high part == sign extended low part
 794                         ]
 795          -- in
 796          return (Fixed size eax code)
 797
 798     --------------------
 799     shift_code :: Width
 800                -> (Size -> Operand -> Operand -> Instr)
 801                -> CmmExpr
 802                -> CmmExpr
 803                -> NatM Register
 804
 805     {- Case1: shift length as immediate -}
 806     shift_code width instr x y@(CmmLit lit) = do
 807           x_code <- getAnyReg x
 808           let
 809                size = intSize width
 810                code dst
 811                   = x_code dst `snocOL`
 812                     instr size (OpImm (litToImm lit)) (OpReg dst)
 813           -- in
 814           return (Any size code)
 815
 816     {- Case2: shift length is complex (non-immediate)
 817       * y must go in %ecx.
 818       * we cannot do y first *and* put its result in %ecx, because
 819         %ecx might be clobbered by x.
 820       * if we do y second, then x cannot be
 821         in a clobbered reg.  Also, we cannot clobber x's reg
 822         with the instruction itself.
 823       * so we can either:
 824         - do y first, put its result in a fresh tmp, then copy it to %ecx later
 825         - do y second and put its result into %ecx.  x gets placed in a fresh
 826           tmp.  This is likely to be better, becuase the reg alloc can
 827           eliminate this reg->reg move here (it won't eliminate the other one,
 828           because the move is into the fixed %ecx).
 829     -}
 830     shift_code width instr x y{-amount-} = do
 831         x_code <- getAnyReg x
 832         let size = intSize width
 833         tmp <- getNewRegNat size
 834         y_code <- getAnyReg y
 835         let
 836            code = x_code tmp `appOL`
 837                   y_code ecx `snocOL`
 838                   instr size (OpReg ecx) (OpReg tmp)
 839         -- in
 840         return (Fixed size tmp code)
 841
 842     --------------------
 843     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 844     add_code rep x (CmmLit (CmmInt y _))
 845         | is32BitInteger y = add_int rep x y
 846     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
 847       where size = intSize rep
 848
 849     --------------------
 850     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
 851     sub_code rep x (CmmLit (CmmInt y _))
 852         | is32BitInteger (-y) = add_int rep x (-y)
 853     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
 854
 855     -- our three-operand add instruction:
 856     add_int width x y = do
 857         (x_reg, x_code) <- getSomeReg x
 858         let
 859             size = intSize width
 860             imm = ImmInt (fromInteger y)
 861             code dst
 862                = x_code `snocOL`
 863                  LEA size
 864                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
 865                         (OpReg dst)
 866         --
 867         return (Any size code)
 868
 869     ----------------------
 870     div_code width signed quotient x y = do
 871            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
 872            x_code <- getAnyReg x
 873            let
 874              size = intSize width
 875              widen | signed    = CLTD size
 876                    | otherwise = XOR size (OpReg edx) (OpReg edx)
 877
 878              instr | signed    = IDIV
 879                    | otherwise = DIV
 880
 881              code = y_code `appOL`
 882                     x_code eax `appOL`
 883                     toOL [widen, instr size y_op]
 884
 885              result | quotient  = eax
 886                     | otherwise = edx
 887
 888            -- in
 889            return (Fixed size result code)
 890
 891
 892 getRegister (CmmLoad mem pk)
 893   | isFloatType pk
 894   = do
 895     Amode src mem_code <- getAmode mem
 896     let
 897         size = cmmTypeSize pk
 898         code dst = mem_code `snocOL`
 899                    IF_ARCH_i386(GLD size src dst,
 900                                 MOV size (OpAddr src) (OpReg dst))
 901     return (Any size code)
 902
 903 #if i386_TARGET_ARCH
 904 getRegister (CmmLoad mem pk)
 905   | not (isWord64 pk)
 906   = do
 907     code <- intLoadCode instr mem
 908     return (Any size code)
 909   where
 910     width = typeWidth pk
 911     size = intSize width
 912     instr = case width of
 913                 W8     -> MOVZxL II8
 914                 _other -> MOV size
 915         -- We always zero-extend 8-bit loads, if we
 916         -- can't think of anything better.  This is because
 917         -- we can't guarantee access to an 8-bit variant of every register
 918         -- (esi and edi don't have 8-bit variants), so to make things
 919         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
 920 #endif
 921
 922 #if x86_64_TARGET_ARCH
 923 -- Simpler memory load code on x86_64
 924 getRegister (CmmLoad mem pk)
 925   = do
 926     code <- intLoadCode (MOV size) mem
 927     return (Any size code)
 928   where size = intSize $ typeWidth pk
 929 #endif
 930
 931 getRegister (CmmLit (CmmInt 0 width))
 932   = let
 933         size = intSize width
 934
 935         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
 936         adj_size = case size of II64 -> II32; _ -> size
 937         size1 = IF_ARCH_i386( size, adj_size )
 938         code dst
 939            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
 940     in
 941         return (Any size code)
 942
 943 #if x86_64_TARGET_ARCH
 944   -- optimisation for loading small literals on x86_64: take advantage
 945   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
 946   -- instruction forms are shorter.
 947 getRegister (CmmLit lit)
 948   | isWord64 (cmmLitType lit), not (isBigLit lit)
 949   = let
 950         imm = litToImm lit
 951         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
 952     in
 953         return (Any II64 code)
 954   where
 955    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
 956    isBigLit _ = False
 957         -- note1: not the same as (not.is32BitLit), because that checks for
 958         -- signed literals that fit in 32 bits, but we want unsigned
 959         -- literals here.
 960         -- note2: all labels are small, because we're assuming the
 961         -- small memory model (see gcc docs, -mcmodel=small).
 962 #endif
 963
 964 getRegister (CmmLit lit)
 965   = let
 966         size = cmmTypeSize (cmmLitType lit)
 967         imm = litToImm lit
 968         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
 969     in
 970         return (Any size code)
 971
 972 getRegister other = pprPanic "getRegister(x86)" (ppr other)
 973
 974
 975 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
 976    -> NatM (Reg -> InstrBlock)
 977 intLoadCode instr mem = do
 978   Amode src mem_code <- getAmode mem
 979   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
 980
 981 -- Compute an expression into *any* register, adding the appropriate
 982 -- move instruction if necessary.
 983 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
 984 getAnyReg expr = do
 985   r <- getRegister expr
 986   anyReg r
 987
 988 anyReg :: Register -> NatM (Reg -> InstrBlock)
 989 anyReg (Any _ code)          = return code
 990 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
 991
 992 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
 993 -- Fixed registers might not be byte-addressable, so we make sure we've
 994 -- got a temporary, inserting an extra reg copy if necessary.
 995 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
 996 #if x86_64_TARGET_ARCH
 997 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
 998 #else
 999 getByteReg expr = do
1000   r <- getRegister expr
1001   case r of
1002     Any rep code -> do
1003         tmp <- getNewRegNat rep
1004         return (tmp, code tmp)
1005     Fixed rep reg code
1006         | isVirtualReg reg -> return (reg,code)
1007         | otherwise -> do
1008             tmp <- getNewRegNat rep
1009             return (tmp, code `snocOL` reg2reg rep reg tmp)
1010         -- ToDo: could optimise slightly by checking for byte-addressable
1011         -- real registers, but that will happen very rarely if at all.
1012 #endif
1013
1014 -- Another variant: this time we want the result in a register that cannot
1015 -- be modified by code to evaluate an arbitrary expression.
1016 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1017 getNonClobberedReg expr = do
1018   r <- getRegister expr
1019   case r of
1020     Any rep code -> do
1021         tmp <- getNewRegNat rep
1022         return (tmp, code tmp)
1023     Fixed rep reg code
1024         -- only free regs can be clobbered
1025         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1026                 tmp <- getNewRegNat rep
1027                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1028         | otherwise ->
1029                 return (reg, code)
1030
1031 reg2reg :: Size -> Reg -> Reg -> Instr
1032 reg2reg size src dst
1033 #if i386_TARGET_ARCH
1034   | isFloatSize size = GMOV src dst
1035 #endif
1036   | otherwise        = MOV size (OpReg src) (OpReg dst)
1037
1038
1039
1040 --------------------------------------------------------------------------------
1041 getAmode :: CmmExpr -> NatM Amode
1042 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1043
1044 #if x86_64_TARGET_ARCH
1045
1046 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1047                                      CmmLit displacement])
1048     = return $ Amode (ripRel (litToImm displacement)) nilOL
1049
1050 #endif
1051
1052
1053 -- This is all just ridiculous, since it carefully undoes
1054 -- what mangleIndexTree has just done.
1055 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1056   | is32BitLit lit
1057   -- ASSERT(rep == II32)???
1058   = do (x_reg, x_code) <- getSomeReg x
1059        let off = ImmInt (-(fromInteger i))
1060        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1061
1062 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1063   | is32BitLit lit
1064   -- ASSERT(rep == II32)???
1065   = do (x_reg, x_code) <- getSomeReg x
1066        let off = ImmInt (fromInteger i)
1067        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1068
1069 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1070 -- recognised by the next rule.
1071 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1072                                   b@(CmmLit _)])
1073   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1074
1075 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1076                                         [y, CmmLit (CmmInt shift _)]])
1077   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1078   = x86_complex_amode x y shift 0
1079
1080 getAmode (CmmMachOp (MO_Add rep)
1081                 [x, CmmMachOp (MO_Add _)
1082                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1083                          CmmLit (CmmInt offset _)]])
1084   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1085   && is32BitInteger offset
1086   = x86_complex_amode x y shift offset
1087
1088 getAmode (CmmMachOp (MO_Add rep) [x,y])
1089   = x86_complex_amode x y 0 0
1090
1091 getAmode (CmmLit lit) | is32BitLit lit
1092   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1093
1094 getAmode expr = do
1095   (reg,code) <- getSomeReg expr
1096   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1097
1098
1099 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1100 x86_complex_amode base index shift offset
1101   = do (x_reg, x_code) <- getNonClobberedReg base
1102         -- x must be in a temp, because it has to stay live over y_code
1103         -- we could compre x_reg and y_reg and do something better here...
1104        (y_reg, y_code) <- getSomeReg index
1105        let
1106            code = x_code `appOL` y_code
1107            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1108        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1109                code)
1110
1111
1112
1113
1114 -- -----------------------------------------------------------------------------
1115 -- getOperand: sometimes any operand will do.
1116
1117 -- getNonClobberedOperand: the value of the operand will remain valid across
1118 -- the computation of an arbitrary expression, unless the expression
1119 -- is computed directly into a register which the operand refers to
1120 -- (see trivialCode where this function is used for an example).
1121
1122 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1123 #if x86_64_TARGET_ARCH
1124 getNonClobberedOperand (CmmLit lit)
1125   | isSuitableFloatingPointLit lit = do
1126     lbl <- getNewLabelNat
1127     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1128                                            CmmStaticLit lit])
1129     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1130 #endif
1131 getNonClobberedOperand (CmmLit lit)
1132   | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
1133     return (OpImm (litToImm lit), nilOL)
1134 getNonClobberedOperand (CmmLoad mem pk)
1135   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1136     Amode src mem_code <- getAmode mem
1137     (src',save_code) <-
1138         if (amodeCouldBeClobbered src)
1139                 then do
1140                    tmp <- getNewRegNat archWordSize
1141                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1142                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1143                 else
1144                    return (src, nilOL)
1145     return (OpAddr src', save_code `appOL` mem_code)
1146 getNonClobberedOperand e = do
1147     (reg, code) <- getNonClobberedReg e
1148     return (OpReg reg, code)
1149
1150 amodeCouldBeClobbered :: AddrMode -> Bool
1151 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1152
1153 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1154 regClobbered _ = False
1155
1156 -- getOperand: the operand is not required to remain valid across the
1157 -- computation of an arbitrary expression.
1158 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1159 #if x86_64_TARGET_ARCH
1160 getOperand (CmmLit lit)
1161   | isSuitableFloatingPointLit lit = do
1162     lbl <- getNewLabelNat
1163     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1164                                            CmmStaticLit lit])
1165     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1166 #endif
1167 getOperand (CmmLit lit)
1168   | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
1169     return (OpImm (litToImm lit), nilOL)
1170 getOperand (CmmLoad mem pk)
1171   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1172     Amode src mem_code <- getAmode mem
1173     return (OpAddr src, mem_code)
1174 getOperand e = do
1175     (reg, code) <- getSomeReg e
1176     return (OpReg reg, code)
1177
1178 isOperand :: CmmExpr -> Bool
1179 isOperand (CmmLoad _ _) = True
1180 isOperand (CmmLit lit)  = is32BitLit lit
1181                           || isSuitableFloatingPointLit lit
1182 isOperand _             = False
1183
1184 -- if we want a floating-point literal as an operand, we can
1185 -- use it directly from memory.  However, if the literal is
1186 -- zero, we're better off generating it into a register using
1187 -- xor.
1188 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1189 isSuitableFloatingPointLit _ = False
1190
1191 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1192 getRegOrMem (CmmLoad mem pk)
1193   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1194     Amode src mem_code <- getAmode mem
1195     return (OpAddr src, mem_code)
1196 getRegOrMem e = do
1197     (reg, code) <- getNonClobberedReg e
1198     return (OpReg reg, code)
1199
1200 #if x86_64_TARGET_ARCH
1201 is32BitLit (CmmInt i W64) = is32BitInteger i
1202    -- assume that labels are in the range 0-2^31-1: this assumes the
1203    -- small memory model (see gcc docs, -mcmodel=small).
1204 #endif
1205 is32BitLit x = True
1206
1207
1208
1209
1210 -- Set up a condition code for a conditional branch.
1211
1212 getCondCode :: CmmExpr -> NatM CondCode
1213
1214 -- yes, they really do seem to want exactly the same!
1215
1216 getCondCode (CmmMachOp mop [x, y])
1217   =
1218     case mop of
1219       MO_F_Eq W32 -> condFltCode EQQ x y
1220       MO_F_Ne W32 -> condFltCode NE  x y
1221       MO_F_Gt W32 -> condFltCode GTT x y
1222       MO_F_Ge W32 -> condFltCode GE  x y
1223       MO_F_Lt W32 -> condFltCode LTT x y
1224       MO_F_Le W32 -> condFltCode LE  x y
1225
1226       MO_F_Eq W64 -> condFltCode EQQ x y
1227       MO_F_Ne W64 -> condFltCode NE  x y
1228       MO_F_Gt W64 -> condFltCode GTT x y
1229       MO_F_Ge W64 -> condFltCode GE  x y
1230       MO_F_Lt W64 -> condFltCode LTT x y
1231       MO_F_Le W64 -> condFltCode LE  x y
1232
1233       MO_Eq rep -> condIntCode EQQ  x y
1234       MO_Ne rep -> condIntCode NE   x y
1235
1236       MO_S_Gt rep -> condIntCode GTT  x y
1237       MO_S_Ge rep -> condIntCode GE   x y
1238       MO_S_Lt rep -> condIntCode LTT  x y
1239       MO_S_Le rep -> condIntCode LE   x y
1240
1241       MO_U_Gt rep -> condIntCode GU   x y
1242       MO_U_Ge rep -> condIntCode GEU  x y
1243       MO_U_Lt rep -> condIntCode LU   x y
1244       MO_U_Le rep -> condIntCode LEU  x y
1245
1246       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1247
1248 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1249
1250
1251
1252
1253 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1254 -- passed back up the tree.
1255
1256 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1257
1258 -- memory vs immediate
1259 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1260     Amode x_addr x_code <- getAmode x
1261     let
1262         imm  = litToImm lit
1263         code = x_code `snocOL`
1264                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1265     --
1266     return (CondCode False cond code)
1267
1268 -- anything vs zero, using a mask
1269 -- TODO: Add some sanity checking!!!!
1270 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1271     | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1272     = do
1273       (x_reg, x_code) <- getSomeReg x
1274       let
1275          code = x_code `snocOL`
1276                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1277       --
1278       return (CondCode False cond code)
1279
1280 -- anything vs zero
1281 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1282     (x_reg, x_code) <- getSomeReg x
1283     let
1284         code = x_code `snocOL`
1285                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1286     --
1287     return (CondCode False cond code)
1288
1289 -- anything vs operand
1290 condIntCode cond x y | isOperand y = do
1291     (x_reg, x_code) <- getNonClobberedReg x
1292     (y_op,  y_code) <- getOperand y
1293     let
1294         code = x_code `appOL` y_code `snocOL`
1295                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1296     -- in
1297     return (CondCode False cond code)
1298
1299 -- anything vs anything
1300 condIntCode cond x y = do
1301   (y_reg, y_code) <- getNonClobberedReg y
1302   (x_op, x_code) <- getRegOrMem x
1303   let
1304         code = y_code `appOL`
1305                x_code `snocOL`
1306                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1307   -- in
1308   return (CondCode False cond code)
1309
1310
1311
1312 --------------------------------------------------------------------------------
1313 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1314
1315 #if i386_TARGET_ARCH
1316 condFltCode cond x y
1317   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1318   (x_reg, x_code) <- getNonClobberedReg x
1319   (y_reg, y_code) <- getSomeReg y
1320   let
1321         code = x_code `appOL` y_code `snocOL`
1322                 GCMP cond x_reg y_reg
1323   -- The GCMP insn does the test and sets the zero flag if comparable
1324   -- and true.  Hence we always supply EQQ as the condition to test.
1325   return (CondCode True EQQ code)
1326
1327 #elif x86_64_TARGET_ARCH
1328 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1329 -- an operand, but the right must be a reg.  We can probably do better
1330 -- than this general case...
1331 condFltCode cond x y = do
1332   (x_reg, x_code) <- getNonClobberedReg x
1333   (y_op, y_code) <- getOperand y
1334   let
1335         code = x_code `appOL`
1336                y_code `snocOL`
1337                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1338         -- NB(1): we need to use the unsigned comparison operators on the
1339         -- result of this comparison.
1340   -- in
1341   return (CondCode True (condToUnsigned cond) code)
1342
1343 #else
1344 condFltCode     = panic "X86.condFltCode: not defined"
1345
1346 #endif
1347
1348
1349
1350 -- -----------------------------------------------------------------------------
1351 -- Generating assignments
1352
1353 -- Assignments are really at the heart of the whole code generation
1354 -- business.  Almost all top-level nodes of any real importance are
1355 -- assignments, which correspond to loads, stores, or register
1356 -- transfers.  If we're really lucky, some of the register transfers
1357 -- will go away, because we can use the destination register to
1358 -- complete the code generation for the right hand side.  This only
1359 -- fails when the right hand side is forced into a fixed register
1360 -- (e.g. the result of a call).
1361
1362 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1363 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1364
1365 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1366 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
1367
1368
1369 -- integer assignment to memory
1370
1371 -- specific case of adding/subtracting an integer to a particular address.
1372 -- ToDo: catch other cases where we can use an operation directly on a memory
1373 -- address.
1374 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1375                                                  CmmLit (CmmInt i _)])
1376    | addr == addr2, pk /= II64 || is32BitInteger i,
1377      Just instr <- check op
1378    = do Amode amode code_addr <- getAmode addr
1379         let code = code_addr `snocOL`
1380                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1381         return code
1382    where
1383         check (MO_Add _) = Just ADD
1384         check (MO_Sub _) = Just SUB
1385         check _ = Nothing
1386         -- ToDo: more?
1387
1388 -- general case
1389 assignMem_IntCode pk addr src = do
1390     Amode addr code_addr <- getAmode addr
1391     (code_src, op_src)   <- get_op_RI src
1392     let
1393         code = code_src `appOL`
1394                code_addr `snocOL`
1395                   MOV pk op_src (OpAddr addr)
1396         -- NOTE: op_src is stable, so it will still be valid
1397         -- after code_addr.  This may involve the introduction
1398         -- of an extra MOV to a temporary register, but we hope
1399         -- the register allocator will get rid of it.
1400     --
1401     return code
1402   where
1403     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
1404     get_op_RI (CmmLit lit) | is32BitLit lit
1405       = return (nilOL, OpImm (litToImm lit))
1406     get_op_RI op
1407       = do (reg,code) <- getNonClobberedReg op
1408            return (code, OpReg reg)
1409
1410
1411 -- Assign; dst is a reg, rhs is mem
1412 assignReg_IntCode pk reg (CmmLoad src _) = do
1413   load_code <- intLoadCode (MOV pk) src
1414   return (load_code (getRegisterReg reg))
1415
1416 -- dst is a reg, but src could be anything
1417 assignReg_IntCode pk reg src = do
1418   code <- getAnyReg src
1419   return (code (getRegisterReg reg))
1420
1421
1422 -- Floating point assignment to memory
1423 assignMem_FltCode pk addr src = do
1424   (src_reg, src_code) <- getNonClobberedReg src
1425   Amode addr addr_code <- getAmode addr
1426   let
1427         code = src_code `appOL`
1428                addr_code `snocOL`
1429                 IF_ARCH_i386(GST pk src_reg addr,
1430                              MOV pk (OpReg src_reg) (OpAddr addr))
1431   return code
1432
1433 -- Floating point assignment to a register/temporary
1434 assignReg_FltCode pk reg src = do
1435   src_code <- getAnyReg src
1436   return (src_code (getRegisterReg reg))
1437
1438
1439 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1440
1441 genJump (CmmLoad mem pk) = do
1442   Amode target code <- getAmode mem
1443   return (code `snocOL` JMP (OpAddr target))
1444
1445 genJump (CmmLit lit) = do
1446   return (unitOL (JMP (OpImm (litToImm lit))))
1447
1448 genJump expr = do
1449   (reg,code) <- getSomeReg expr
1450   return (code `snocOL` JMP (OpReg reg))
1451
1452
1453 -- -----------------------------------------------------------------------------
1454 --  Unconditional branches
1455
1456 genBranch :: BlockId -> NatM InstrBlock
1457 genBranch = return . toOL . mkJumpInstr
1458
1459
1460
1461 -- -----------------------------------------------------------------------------
1462 --  Conditional jumps
1463
1464 {-
1465 Conditional jumps are always to local labels, so we can use branch
1466 instructions.  We peek at the arguments to decide what kind of
1467 comparison to do.
1468
1469 I386: First, we have to ensure that the condition
1470 codes are set according to the supplied comparison operation.
1471 -}
1472
1473 genCondJump
1474     :: BlockId      -- the branch target
1475     -> CmmExpr      -- the condition on which to branch
1476     -> NatM InstrBlock
1477
1478 #if i386_TARGET_ARCH
1479 genCondJump id bool = do
1480   CondCode _ cond code <- getCondCode bool
1481   return (code `snocOL` JXX cond id)
1482
1483 #elif x86_64_TARGET_ARCH
1484 genCondJump id bool = do
1485   CondCode is_float cond cond_code <- getCondCode bool
1486   if not is_float
1487     then
1488         return (cond_code `snocOL` JXX cond id)
1489     else do
1490         lbl <- getBlockIdNat
1491
1492         -- see comment with condFltReg
1493         let code = case cond of
1494                         NE  -> or_unordered
1495                         GU  -> plain_test
1496                         GEU -> plain_test
1497                         _   -> and_ordered
1498
1499             plain_test = unitOL (
1500                   JXX cond id
1501                 )
1502             or_unordered = toOL [
1503                   JXX cond id,
1504                   JXX PARITY id
1505                 ]
1506             and_ordered = toOL [
1507                   JXX PARITY lbl,
1508                   JXX cond id,
1509                   JXX ALWAYS lbl,
1510                   NEWBLOCK lbl
1511                 ]
1512         return (cond_code `appOL` code)
1513
1514 #else
1515 genCondJump     = panic "X86.genCondJump: not defined"
1516
1517 #endif
1518
1519
1520
1521
1522 -- -----------------------------------------------------------------------------
1523 --  Generating C calls
1524
1525 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
1526 -- @get_arg@, which moves the arguments to the correct registers/stack
1527 -- locations.  Apart from that, the code is easy.
1528 --
1529 -- (If applicable) Do not fill the delay slots here; you will confuse the
1530 -- register allocator.
1531
1532 genCCall
1533     :: CmmCallTarget            -- function to call
1534     -> HintedCmmFormals         -- where to put the result
1535     -> HintedCmmActuals         -- arguments (of mixed type)
1536     -> NatM InstrBlock
1537
1538 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1539
1540 #if i386_TARGET_ARCH
1541
1542 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1543         -- write barrier compiles to no code on x86/x86-64;
1544         -- we keep it this long in order to prevent earlier optimisations.
1545
1546 -- we only cope with a single result for foreign calls
1547 genCCall (CmmPrim op) [CmmHinted r _] args = do
1548   l1 <- getNewLabelNat
1549   l2 <- getNewLabelNat
1550   case op of
1551         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1552         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1553
1554         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1555         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1556
1557         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1558         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1559
1560         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1561         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1562
1563         other_op    -> outOfLineFloatOp op r args
1564  where
1565   actuallyInlineFloatOp instr size [CmmHinted x _]
1566         = do res <- trivialUFCode size (instr size) x
1567              any <- anyReg res
1568              return (any (getRegisterReg (CmmLocal r)))
1569
1570 genCCall target dest_regs args = do
1571     let
1572         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1573 #if !darwin_TARGET_OS
1574         tot_arg_size        = sum sizes
1575 #else
1576         raw_arg_size        = sum sizes
1577         tot_arg_size        = roundTo 16 raw_arg_size
1578         arg_pad_size        = tot_arg_size - raw_arg_size
1579     delta0 <- getDeltaNat
1580     setDeltaNat (delta0 - arg_pad_size)
1581 #endif
1582
1583     push_codes <- mapM push_arg (reverse args)
1584     delta <- getDeltaNat
1585
1586     -- in
1587     -- deal with static vs dynamic call targets
1588     (callinsns,cconv) <-
1589       case target of
1590         -- CmmPrim -> ...
1591         CmmCallee (CmmLit (CmmLabel lbl)) conv
1592            -> -- ToDo: stdcall arg sizes
1593               return (unitOL (CALL (Left fn_imm) []), conv)
1594            where fn_imm = ImmCLbl lbl
1595         CmmCallee expr conv
1596            -> do { (dyn_c, dyn_r) <- get_op expr
1597                  ; ASSERT( isWord32 (cmmExprType expr) )
1598                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1599
1600     let push_code
1601 #if darwin_TARGET_OS
1602             | arg_pad_size /= 0
1603             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1604                     DELTA (delta0 - arg_pad_size)]
1605               `appOL` concatOL push_codes
1606             | otherwise
1607 #endif
1608             = concatOL push_codes
1609         call = callinsns `appOL`
1610                toOL (
1611                         -- Deallocate parameters after call for ccall;
1612                         -- but not for stdcall (callee does it)
1613                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
1614                    [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
1615                   ++
1616                   [DELTA (delta + tot_arg_size)]
1617                )
1618     -- in
1619     setDeltaNat (delta + tot_arg_size)
1620
1621     let
1622         -- assign the results, if necessary
1623         assign_code []     = nilOL
1624         assign_code [CmmHinted dest _hint]
1625           | isFloatType ty = unitOL (GMOV fake0 r_dest)
1626           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1627                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1628           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1629           where
1630                 ty = localRegType dest
1631                 w  = typeWidth ty
1632                 r_dest_hi = getHiVRegFromLo r_dest
1633                 r_dest    = getRegisterReg (CmmLocal dest)
1634         assign_code many = panic "genCCall.assign_code many"
1635
1636     return (push_code `appOL`
1637             call `appOL`
1638             assign_code dest_regs)
1639
1640   where
1641     arg_size :: CmmType -> Int  -- Width in bytes
1642     arg_size ty = widthInBytes (typeWidth ty)
1643
1644     roundTo a x | x `mod` a == 0 = x
1645                 | otherwise = x + a - (x `mod` a)
1646
1647
1648     push_arg :: HintedCmmActual {-current argument-}
1649                     -> NatM InstrBlock  -- code
1650
1651     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
1652       | isWord64 arg_ty = do
1653         ChildCode64 code r_lo <- iselExpr64 arg
1654         delta <- getDeltaNat
1655         setDeltaNat (delta - 8)
1656         let
1657             r_hi = getHiVRegFromLo r_lo
1658         -- in
1659         return (       code `appOL`
1660                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1661                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1662                              DELTA (delta-8)]
1663             )
1664
1665       | otherwise = do
1666         (code, reg) <- get_op arg
1667         delta <- getDeltaNat
1668         let size = arg_size arg_ty      -- Byte size
1669         setDeltaNat (delta-size)
1670         if (isFloatType arg_ty)
1671            then return (code `appOL`
1672                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1673                               DELTA (delta-size),
1674                               GST (floatSize (typeWidth arg_ty))
1675                                   reg (AddrBaseIndex (EABaseReg esp)
1676                                                         EAIndexNone
1677                                                         (ImmInt 0))]
1678                        )
1679            else return (code `snocOL`
1680                         PUSH II32 (OpReg reg) `snocOL`
1681                         DELTA (delta-size)
1682                        )
1683       where
1684          arg_ty = cmmExprType arg
1685
1686     ------------
1687     get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
1688     get_op op = do
1689         (reg,code) <- getSomeReg op
1690         return (code, reg)
1691
1692 #elif x86_64_TARGET_ARCH
1693
1694 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1695         -- write barrier compiles to no code on x86/x86-64;
1696         -- we keep it this long in order to prevent earlier optimisations.
1697
1698
1699 genCCall (CmmPrim op) [CmmHinted r _] args =
1700   outOfLineFloatOp op r args
1701
1702 genCCall target dest_regs args = do
1703
1704         -- load up the register arguments
1705     (stack_args, aregs, fregs, load_args_code)
1706          <- load_args args allArgRegs allFPArgRegs nilOL
1707
1708     let
1709         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
1710         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1711         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1712                 -- for annotating the call instruction with
1713
1714         sse_regs = length fp_regs_used
1715
1716         tot_arg_size = arg_size * length stack_args
1717
1718         -- On entry to the called function, %rsp should be aligned
1719         -- on a 16-byte boundary +8 (i.e. the first stack arg after
1720         -- the return address is 16-byte aligned).  In STG land
1721         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1722         -- need to make sure we push a multiple of 16-bytes of args,
1723         -- plus the return address, to get the correct alignment.
1724         -- Urg, this is hard.  We need to feed the delta back into
1725         -- the arg pushing code.
1726     (real_size, adjust_rsp) <-
1727         if tot_arg_size `rem` 16 == 0
1728             then return (tot_arg_size, nilOL)
1729             else do -- we need to adjust...
1730                 delta <- getDeltaNat
1731                 setDeltaNat (delta-8)
1732                 return (tot_arg_size+8, toOL [
1733                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1734                                 DELTA (delta-8)
1735                         ])
1736
1737         -- push the stack args, right to left
1738     push_code <- push_args (reverse stack_args) nilOL
1739     delta <- getDeltaNat
1740
1741     -- deal with static vs dynamic call targets
1742     (callinsns,cconv) <-
1743       case target of
1744         -- CmmPrim -> ...
1745         CmmCallee (CmmLit (CmmLabel lbl)) conv
1746            -> -- ToDo: stdcall arg sizes
1747               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1748            where fn_imm = ImmCLbl lbl
1749         CmmCallee expr conv
1750            -> do (dyn_r, dyn_c) <- getSomeReg expr
1751                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1752
1753     let
1754         -- The x86_64 ABI requires us to set %al to the number of SSE
1755         -- registers that contain arguments, if the called routine
1756         -- is a varargs function.  We don't know whether it's a
1757         -- varargs function or not, so we have to assume it is.
1758         --
1759         -- It's not safe to omit this assignment, even if the number
1760         -- of SSE regs in use is zero.  If %al is larger than 8
1761         -- on entry to a varargs function, seg faults ensue.
1762         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1763
1764     let call = callinsns `appOL`
1765                toOL (
1766                         -- Deallocate parameters after call for ccall;
1767                         -- but not for stdcall (callee does it)
1768                   (if cconv == StdCallConv || real_size==0 then [] else
1769                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1770                   ++
1771                   [DELTA (delta + real_size)]
1772                )
1773     -- in
1774     setDeltaNat (delta + real_size)
1775
1776     let
1777         -- assign the results, if necessary
1778         assign_code []     = nilOL
1779         assign_code [CmmHinted dest _hint] =
1780           case typeWidth rep of
1781                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1782                 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1783                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1784           where
1785                 rep = localRegType dest
1786                 r_dest = getRegisterReg (CmmLocal dest)
1787         assign_code many = panic "genCCall.assign_code many"
1788
1789     return (load_args_code      `appOL`
1790             adjust_rsp          `appOL`
1791             push_code           `appOL`
1792             assign_eax sse_regs `appOL`
1793             call                `appOL`
1794             assign_code dest_regs)
1795
1796   where
1797     arg_size = 8 -- always, at the mo
1798
1799     load_args :: [CmmHinted CmmExpr]
1800               -> [Reg]                  -- int regs avail for args
1801               -> [Reg]                  -- FP regs avail for args
1802               -> InstrBlock
1803               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1804     load_args args [] [] code     =  return (args, [], [], code)
1805         -- no more regs to use
1806     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
1807         -- no more args to push
1808     load_args ((CmmHinted arg hint) : rest) aregs fregs code
1809         | isFloatType arg_rep =
1810         case fregs of
1811           [] -> push_this_arg
1812           (r:rs) -> do
1813              arg_code <- getAnyReg arg
1814              load_args rest aregs rs (code `appOL` arg_code r)
1815         | otherwise =
1816         case aregs of
1817           [] -> push_this_arg
1818           (r:rs) -> do
1819              arg_code <- getAnyReg arg
1820              load_args rest rs fregs (code `appOL` arg_code r)
1821         where
1822           arg_rep = cmmExprType arg
1823
1824           push_this_arg = do
1825             (args',ars,frs,code') <- load_args rest aregs fregs code
1826             return ((CmmHinted arg hint):args', ars, frs, code')
1827
1828     push_args [] code = return code
1829     push_args ((CmmHinted arg hint):rest) code
1830        | isFloatType arg_rep = do
1831          (arg_reg, arg_code) <- getSomeReg arg
1832          delta <- getDeltaNat
1833          setDeltaNat (delta-arg_size)
1834          let code' = code `appOL` arg_code `appOL` toOL [
1835                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1836                         DELTA (delta-arg_size),
1837                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
1838          push_args rest code'
1839
1840        | otherwise = do
1841        -- we only ever generate word-sized function arguments.  Promotion
1842        -- has already happened: our Int8# type is kept sign-extended
1843        -- in an Int#, for example.
1844          ASSERT(width == W64) return ()
1845          (arg_op, arg_code) <- getOperand arg
1846          delta <- getDeltaNat
1847          setDeltaNat (delta-arg_size)
1848          let code' = code `appOL` arg_code `appOL` toOL [
1849                                 PUSH II64 arg_op,
1850                                 DELTA (delta-arg_size)]
1851          push_args rest code'
1852         where
1853           arg_rep = cmmExprType arg
1854           width = typeWidth arg_rep
1855
1856 #else
1857 genCCall        = panic "X86.genCCAll: not defined"
1858
1859 #endif /* x86_64_TARGET_ARCH */
1860
1861
1862
1863
1864 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
1865 outOfLineFloatOp mop res args
1866   = do
1867       dflags <- getDynFlagsNat
1868       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1869       let target = CmmCallee targetExpr CCallConv
1870
1871       if isFloat64 (localRegType res)
1872         then
1873           stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
1874         else do
1875           uq <- getUniqueNat
1876           let
1877             tmp = LocalReg uq f64
1878           -- in
1879           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
1880           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
1881           return (code1 `appOL` code2)
1882   where
1883         lbl = mkForeignLabel fn Nothing False IsFunction
1884
1885         fn = case mop of
1886               MO_F32_Sqrt  -> fsLit "sqrtf"
1887               MO_F32_Sin   -> fsLit "sinf"
1888               MO_F32_Cos   -> fsLit "cosf"
1889               MO_F32_Tan   -> fsLit "tanf"
1890               MO_F32_Exp   -> fsLit "expf"
1891               MO_F32_Log   -> fsLit "logf"
1892
1893               MO_F32_Asin  -> fsLit "asinf"
1894               MO_F32_Acos  -> fsLit "acosf"
1895               MO_F32_Atan  -> fsLit "atanf"
1896
1897               MO_F32_Sinh  -> fsLit "sinhf"
1898               MO_F32_Cosh  -> fsLit "coshf"
1899               MO_F32_Tanh  -> fsLit "tanhf"
1900               MO_F32_Pwr   -> fsLit "powf"
1901
1902               MO_F64_Sqrt  -> fsLit "sqrt"
1903               MO_F64_Sin   -> fsLit "sin"
1904               MO_F64_Cos   -> fsLit "cos"
1905               MO_F64_Tan   -> fsLit "tan"
1906               MO_F64_Exp   -> fsLit "exp"
1907               MO_F64_Log   -> fsLit "log"
1908
1909               MO_F64_Asin  -> fsLit "asin"
1910               MO_F64_Acos  -> fsLit "acos"
1911               MO_F64_Atan  -> fsLit "atan"
1912
1913               MO_F64_Sinh  -> fsLit "sinh"
1914               MO_F64_Cosh  -> fsLit "cosh"
1915               MO_F64_Tanh  -> fsLit "tanh"
1916               MO_F64_Pwr   -> fsLit "pow"
1917
1918
1919
1920
1921
1922 -- -----------------------------------------------------------------------------
1923 -- Generating a table-branch
1924
1925 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1926
1927 genSwitch expr ids
1928   | opt_PIC
1929   = do
1930         (reg,e_code) <- getSomeReg expr
1931         lbl <- getNewLabelNat
1932         dflags <- getDynFlagsNat
1933         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1934         (tableReg,t_code) <- getSomeReg $ dynRef
1935         let
1936             jumpTable = map jumpTableEntryRel ids
1937
1938             jumpTableEntryRel Nothing
1939                 = CmmStaticLit (CmmInt 0 wordWidth)
1940             jumpTableEntryRel (Just (BlockId id))
1941                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1942                 where blockLabel = mkAsmTempLabel id
1943
1944             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1945                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
1946
1947 #if x86_64_TARGET_ARCH
1948 #if darwin_TARGET_OS
1949     -- on Mac OS X/x86_64, put the jump table in the text section
1950     -- to work around a limitation of the linker.
1951     -- ld64 is unable to handle the relocations for
1952     --     .quad L1 - L0
1953     -- if L0 is not preceded by a non-anonymous label in its section.
1954
1955             code = e_code `appOL` t_code `appOL` toOL [
1956                             ADD (intSize wordWidth) op (OpReg tableReg),
1957                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
1958                             LDATA Text (CmmDataLabel lbl : jumpTable)
1959                     ]
1960 #else
1961     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1962     -- relocations, hence we only get 32-bit offsets in the jump
1963     -- table. As these offsets are always negative we need to properly
1964     -- sign extend them to 64-bit. This hack should be removed in
1965     -- conjunction with the hack in PprMach.hs/pprDataItem once
1966     -- binutils 2.17 is standard.
1967             code = e_code `appOL` t_code `appOL` toOL [
1968                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1969                             MOVSxL II32
1970                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1971                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
1972                                    (OpReg reg),
1973                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1974                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1975                    ]
1976 #endif
1977 #else
1978             code = e_code `appOL` t_code `appOL` toOL [
1979                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1980                             ADD (intSize wordWidth) op (OpReg tableReg),
1981                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1982                     ]
1983 #endif
1984         return code
1985   | otherwise
1986   = do
1987         (reg,e_code) <- getSomeReg expr
1988         lbl <- getNewLabelNat
1989         let
1990             jumpTable = map jumpTableEntry ids
1991             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1992             code = e_code `appOL` toOL [
1993                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1994                     JMP_TBL op [ id | Just id <- ids ]
1995                  ]
1996         -- in
1997         return code
1998
1999
2000 -- -----------------------------------------------------------------------------
2001 -- 'condIntReg' and 'condFltReg': condition codes into registers
2002
2003 -- Turn those condition codes into integers now (when they appear on
2004 -- the right hand side of an assignment).
2005 --
2006 -- (If applicable) Do not fill the delay slots here; you will confuse the
2007 -- register allocator.
2008
2009 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2010
2011 condIntReg cond x y = do
2012   CondCode _ cond cond_code <- condIntCode cond x y
2013   tmp <- getNewRegNat II8
2014   let
2015         code dst = cond_code `appOL` toOL [
2016                     SETCC cond (OpReg tmp),
2017                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2018                   ]
2019   -- in
2020   return (Any II32 code)
2021
2022
2023
2024 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2025
2026 #if i386_TARGET_ARCH
2027 condFltReg cond x y = do
2028   CondCode _ cond cond_code <- condFltCode cond x y
2029   tmp <- getNewRegNat II8
2030   let
2031         code dst = cond_code `appOL` toOL [
2032                     SETCC cond (OpReg tmp),
2033                     MOVZxL II8 (OpReg tmp) (OpReg dst)
2034                   ]
2035   -- in
2036   return (Any II32 code)
2037
2038 #elif x86_64_TARGET_ARCH
2039 condFltReg cond x y = do
2040   CondCode _ cond cond_code <- condFltCode cond x y
2041   tmp1 <- getNewRegNat archWordSize
2042   tmp2 <- getNewRegNat archWordSize
2043   let
2044         -- We have to worry about unordered operands (eg. comparisons
2045         -- against NaN).  If the operands are unordered, the comparison
2046         -- sets the parity flag, carry flag and zero flag.
2047         -- All comparisons are supposed to return false for unordered
2048         -- operands except for !=, which returns true.
2049         --
2050         -- Optimisation: we don't have to test the parity flag if we
2051         -- know the test has already excluded the unordered case: eg >
2052         -- and >= test for a zero carry flag, which can only occur for
2053         -- ordered operands.
2054         --
2055         -- ToDo: by reversing comparisons we could avoid testing the
2056         -- parity flag in more cases.
2057
2058         code dst =
2059            cond_code `appOL`
2060              (case cond of
2061                 NE  -> or_unordered dst
2062                 GU  -> plain_test   dst
2063                 GEU -> plain_test   dst
2064                 _   -> and_ordered  dst)
2065
2066         plain_test dst = toOL [
2067                     SETCC cond (OpReg tmp1),
2068                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
2069                  ]
2070         or_unordered dst = toOL [
2071                     SETCC cond (OpReg tmp1),
2072                     SETCC PARITY (OpReg tmp2),
2073                     OR II8 (OpReg tmp1) (OpReg tmp2),
2074                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2075                   ]
2076         and_ordered dst = toOL [
2077                     SETCC cond (OpReg tmp1),
2078                     SETCC NOTPARITY (OpReg tmp2),
2079                     AND II8 (OpReg tmp1) (OpReg tmp2),
2080                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
2081                   ]
2082   -- in
2083   return (Any II32 code)
2084
2085 #else
2086 condFltReg      = panic "X86.condFltReg: not defined"
2087
2088 #endif
2089
2090
2091
2092
2093 -- -----------------------------------------------------------------------------
2094 -- 'trivial*Code': deal with trivial instructions
2095
2096 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2097 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2098 -- Only look for constants on the right hand side, because that's
2099 -- where the generic optimizer will have put them.
2100
2101 -- Similarly, for unary instructions, we don't have to worry about
2102 -- matching an StInt as the argument, because genericOpt will already
2103 -- have handled the constant-folding.
2104
2105
2106 {-
2107 The Rules of the Game are:
2108
2109 * You cannot assume anything about the destination register dst;
2110   it may be anything, including a fixed reg.
2111
2112 * You may compute an operand into a fixed reg, but you may not
2113   subsequently change the contents of that fixed reg.  If you
2114   want to do so, first copy the value either to a temporary
2115   or into dst.  You are free to modify dst even if it happens
2116   to be a fixed reg -- that's not your problem.
2117
2118 * You cannot assume that a fixed reg will stay live over an
2119   arbitrary computation.  The same applies to the dst reg.
2120
2121 * Temporary regs obtained from getNewRegNat are distinct from
2122   each other and from all other regs, and stay live over
2123   arbitrary computations.
2124
2125 --------------------
2126
2127 SDM's version of The Rules:
2128
2129 * If getRegister returns Any, that means it can generate correct
2130   code which places the result in any register, period.  Even if that
2131   register happens to be read during the computation.
2132
2133   Corollary #1: this means that if you are generating code for an
2134   operation with two arbitrary operands, you cannot assign the result
2135   of the first operand into the destination register before computing
2136   the second operand.  The second operand might require the old value
2137   of the destination register.
2138
2139   Corollary #2: A function might be able to generate more efficient
2140   code if it knows the destination register is a new temporary (and
2141   therefore not read by any of the sub-computations).
2142
2143 * If getRegister returns Any, then the code it generates may modify only:
2144         (a) fresh temporaries
2145         (b) the destination register
2146         (c) known registers (eg. %ecx is used by shifts)
2147   In particular, it may *not* modify global registers, unless the global
2148   register happens to be the destination register.
2149 -}
2150
2151 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2152   | is32BitLit lit_a = do
2153   b_code <- getAnyReg b
2154   let
2155        code dst
2156          = b_code dst `snocOL`
2157            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2158   -- in
2159   return (Any (intSize width) code)
2160
2161 trivialCode width instr maybe_revinstr a b
2162   = genTrivialCode (intSize width) instr a b
2163
2164 -- This is re-used for floating pt instructions too.
2165 genTrivialCode rep instr a b = do
2166   (b_op, b_code) <- getNonClobberedOperand b
2167   a_code <- getAnyReg a
2168   tmp <- getNewRegNat rep
2169   let
2170      -- We want the value of b to stay alive across the computation of a.
2171      -- But, we want to calculate a straight into the destination register,
2172      -- because the instruction only has two operands (dst := dst `op` src).
2173      -- The troublesome case is when the result of b is in the same register
2174      -- as the destination reg.  In this case, we have to save b in a
2175      -- new temporary across the computation of a.
2176      code dst
2177         | dst `regClashesWithOp` b_op =
2178                 b_code `appOL`
2179                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2180                 a_code dst `snocOL`
2181                 instr (OpReg tmp) (OpReg dst)
2182         | otherwise =
2183                 b_code `appOL`
2184                 a_code dst `snocOL`
2185                 instr b_op (OpReg dst)
2186   -- in
2187   return (Any rep code)
2188
2189 reg `regClashesWithOp` OpReg reg2   = reg == reg2
2190 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2191 reg `regClashesWithOp` _            = False
2192
2193 -----------
2194
2195 trivialUCode rep instr x = do
2196   x_code <- getAnyReg x
2197   let
2198      code dst =
2199         x_code dst `snocOL`
2200         instr (OpReg dst)
2201   return (Any rep code)
2202
2203 -----------
2204
2205 #if i386_TARGET_ARCH
2206
2207 trivialFCode width instr x y = do
2208   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2209   (y_reg, y_code) <- getSomeReg y
2210   let
2211      size = floatSize width
2212      code dst =
2213         x_code `appOL`
2214         y_code `snocOL`
2215         instr size x_reg y_reg dst
2216   return (Any size code)
2217
2218 #endif
2219
2220 #if x86_64_TARGET_ARCH
2221 trivialFCode pk instr x y
2222   = genTrivialCode size (instr size) x y
2223   where size = floatSize pk
2224 #endif
2225
2226 trivialUFCode size instr x = do
2227   (x_reg, x_code) <- getSomeReg x
2228   let
2229      code dst =
2230         x_code `snocOL`
2231         instr x_reg dst
2232   -- in
2233   return (Any size code)
2234
2235
2236 --------------------------------------------------------------------------------
2237 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2238
2239 #if i386_TARGET_ARCH
2240 coerceInt2FP from to x = do
2241   (x_reg, x_code) <- getSomeReg x
2242   let
2243         opc  = case to of W32 -> GITOF; W64 -> GITOD
2244         code dst = x_code `snocOL` opc x_reg dst
2245         -- ToDo: works for non-II32 reps?
2246   return (Any (floatSize to) code)
2247
2248 #elif x86_64_TARGET_ARCH
2249 coerceInt2FP from to x = do
2250   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2251   let
2252         opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2253         code dst = x_code `snocOL` opc x_op dst
2254   -- in
2255   return (Any (floatSize to) code) -- works even if the destination rep is <II32
2256
2257 #else
2258 coerceInt2FP    = panic "X86.coerceInt2FP: not defined"
2259
2260 #endif
2261
2262
2263
2264
2265 --------------------------------------------------------------------------------
2266 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2267
2268 #if i386_TARGET_ARCH
2269 coerceFP2Int from to x = do
2270   (x_reg, x_code) <- getSomeReg x
2271   let
2272         opc  = case from of W32 -> GFTOI; W64 -> GDTOI
2273         code dst = x_code `snocOL` opc x_reg dst
2274         -- ToDo: works for non-II32 reps?
2275   -- in
2276   return (Any (intSize to) code)
2277
2278 #elif x86_64_TARGET_ARCH
2279 coerceFP2Int from to x = do
2280   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
2281   let
2282         opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2283         code dst = x_code `snocOL` opc x_op dst
2284   -- in
2285   return (Any (intSize to) code) -- works even if the destination rep is <II32
2286
2287 #else
2288 coerceFP2Int    = panic "X86.coerceFP2Int: not defined"
2289
2290 #endif
2291
2292
2293
2294
2295 --------------------------------------------------------------------------------
2296 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2297
2298 #if x86_64_TARGET_ARCH
2299 coerceFP2FP to x = do
2300   (x_reg, x_code) <- getSomeReg x
2301   let
2302         opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2303         code dst = x_code `snocOL` opc x_reg dst
2304   -- in
2305   return (Any (floatSize to) code)
2306
2307 #else
2308 coerceFP2FP     = panic "X86.coerceFP2FP: not defined"
2309
2310 #endif
2311
2312
2313