ghc/compiler/nativeGen/MachMisc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[MachMisc]{Description of various machine-specific things}
   5
   6 \begin{code}
   7 #include "nativeGen/NCG.h"
   8
   9 module MachMisc (
  10
  11         primRepToSize,
  12
  13         eXTRA_STK_ARGS_HERE,
  14
  15         volatileSaves, volatileRestores,
  16
  17         targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
  18
  19         underscorePrefix,
  20         fmtAsmLbl,
  21         exactLog2,
  22
  23         Instr(..),  IF_ARCH_i386(Operand(..) COMMA,)
  24         Cond(..),
  25         Size(..),
  26         IF_ARCH_i386(i386_insert_ffrees COMMA,)
  27
  28 #if alpha_TARGET_ARCH
  29         , RI(..)
  30 #endif
  31 #if i386_TARGET_ARCH
  32 #endif
  33 #if sparc_TARGET_ARCH
  34         RI(..), riZero, fpRelEA, moveSp, fPair
  35 #endif
  36     ) where
  37
  38 #include "HsVersions.h"
  39 #include "../includes/config.h"
  40
  41 import AbsCSyn          ( MagicId(..) )
  42 import AbsCUtils        ( magicIdPrimRep )
  43 import CLabel           ( CLabel, isAsmTemp )
  44 import Literal          ( mkMachInt, Literal(..) )
  45 import MachRegs         ( callerSaves,
  46                           get_MagicId_addr, get_MagicId_reg_or_addr,
  47                           Imm(..), Reg(..), MachRegsAddr(..)
  48 #                         if sparc_TARGET_ARCH
  49                           ,fp, sp
  50 #                         endif
  51                         )
  52 import PrimRep          ( PrimRep(..) )
  53 import Stix             ( StixStmt(..), StixExpr(..), StixReg(..),
  54                           CodeSegment, DestInfo(..) )
  55 import Panic            ( panic )
  56 import GlaExts          ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
  57 import Outputable       ( pprPanic, ppr, showSDoc )
  58 import IOExts           ( trace )
  59 import Config           ( cLeadingUnderscore )
  60 import FastTypes
  61
  62 import Maybe            ( catMaybes )
  63 \end{code}
  64
  65 \begin{code}
  66 underscorePrefix :: Bool   -- leading underscore on assembler labels?
  67 underscorePrefix = (cLeadingUnderscore == "YES")
  68
  69 ---------------------------
  70 fmtAsmLbl :: String -> String  -- for formatting labels
  71
  72 fmtAsmLbl s
  73   =  IF_ARCH_alpha(
  74      {- The alpha assembler likes temporary labels to look like $L123
  75         instead of L123.  (Don't toss the L, because then Lf28
  76         turns into $f28.)
  77      -}
  78      '$' : s
  79      ,{-otherwise-}
  80      '.':'L':s
  81      )
  82 \end{code}
  83
  84 % ----------------------------------------------------------------
  85
  86 We (allegedly) put the first six C-call arguments in registers;
  87 where do we start putting the rest of them?
  88 \begin{code}
  89 eXTRA_STK_ARGS_HERE :: Int
  90 eXTRA_STK_ARGS_HERE
  91   = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
  92 \end{code}
  93
  94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  95
  96 Now the volatile saves and restores.  We add the basic guys to the
  97 list of ``user'' registers provided.  Note that there are more basic
  98 registers on the restore list, because some are reloaded from
  99 constants.
 100
 101 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
 102
 103 \begin{code}
 104 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
 105
 106 volatileSaves    = volatileSavesOrRestores True
 107 volatileRestores = volatileSavesOrRestores False
 108
 109 save_cands    = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
 110 restore_cands = save_cands
 111
 112 volatileSavesOrRestores do_saves vols
 113    = catMaybes (map mkCode vols)
 114      where
 115         mkCode mid
 116            | not (callerSaves mid)
 117            = Nothing
 118            | otherwise  -- must be callee-saves ...
 119            = case get_MagicId_reg_or_addr mid of
 120                 -- If stored in BaseReg, we ain't interested
 121                 Right baseRegAddr
 122                    -> Nothing
 123                 Left (RealReg rrno)
 124                    -- OK, it's callee-saves, and in a real reg (rrno).
 125                    -- We have to cook up some transfer code.
 126                    {- Note that the use of (StixMagicId mid) here is a bit subtle.
 127                       Here, we only create those for MagicIds which are stored in
 128                       a real reg on this arch -- the preceding case on the result
 129                       of get_MagicId_reg_or_addr guarantees this.  Later, when
 130                       selecting insns, that means these assignments are sure to turn
 131                       into real reg-to-mem or mem-to-reg moves, rather than being
 132                       pointless moves from some address in the reg-table
 133                       back to itself.-}
 134                    |  do_saves
 135                    -> Just (StAssignMem rep addr
 136                                             (StReg (StixMagicId mid)))
 137                    |  otherwise
 138                    -> Just (StAssignReg rep (StixMagicId mid)
 139                                             (StInd rep addr))
 140                       where
 141                          rep  = magicIdPrimRep mid
 142                          addr = get_MagicId_addr mid
 143 \end{code}
 144
 145 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 146
 147 Obviously slightly weedy
 148 (Note that the floating point values aren't terribly important.)
 149 ToDo: Fix!(JSM)
 150 \begin{code}
 151 targetMinDouble = MachDouble (-1.7976931348623157e+308)
 152 targetMaxDouble = MachDouble (1.7976931348623157e+308)
 153 targetMinInt = mkMachInt (-2147483648)
 154 targetMaxInt = mkMachInt 2147483647
 155 \end{code}
 156
 157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 158
 159 This algorithm for determining the $\log_2$ of exact powers of 2 comes
 160 from GCC.  It requires bit manipulation primitives, and we use GHC
 161 extensions.  Tough.
 162
 163 \begin{code}
 164 w2i x = word2Int# x
 165 i2w x = int2Word# x
 166
 167 exactLog2 :: Integer -> Maybe Integer
 168 exactLog2 x
 169   = if (x <= 0 || x >= 2147483648) then
 170        Nothing
 171     else
 172        case iUnbox (fromInteger x) of { x# ->
 173        if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
 174           Nothing
 175        else
 176           Just (toInteger (iBox (pow2 x#)))
 177        }
 178   where
 179     shiftr x y = shiftRL# x y
 180
 181     pow2 x# | x# ==# 1# = 0#
 182             | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
 183 \end{code}
 184
 185 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 186
 187 \begin{code}
 188 data Cond
 189 #if alpha_TARGET_ARCH
 190   = ALWAYS      -- For BI (same as BR)
 191   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
 192   | GE          -- For BI only
 193   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
 194   | LE          -- For CMP and BI
 195   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
 196   | NE          -- For BI only
 197   | NEVER       -- For BI (null instruction)
 198   | ULE         -- For CMP only
 199   | ULT         -- For CMP only
 200 #endif
 201 #if i386_TARGET_ARCH
 202   = ALWAYS      -- What's really used? ToDo
 203   | EQQ
 204   | GE
 205   | GEU
 206   | GTT
 207   | GU
 208   | LE
 209   | LEU
 210   | LTT
 211   | LU
 212   | NE
 213   | NEG
 214   | POS
 215   | CARRY
 216   | OFLO
 217 #endif
 218 #if sparc_TARGET_ARCH
 219   = ALWAYS      -- What's really used? ToDo
 220   | EQQ
 221   | GE
 222   | GEU
 223   | GTT
 224   | GU
 225   | LE
 226   | LEU
 227   | LTT
 228   | LU
 229   | NE
 230   | NEG
 231   | NEVER
 232   | POS
 233   | VC
 234   | VS
 235 #endif
 236 \end{code}
 237
 238 \begin{code}
 239 data Size
 240 #if alpha_TARGET_ARCH
 241     = B     -- byte
 242     | Bu
 243 --  | W     -- word (2 bytes): UNUSED
 244 --  | Wu    -- : UNUSED
 245     | L     -- longword (4 bytes)
 246     | Q     -- quadword (8 bytes)
 247 --  | FF    -- VAX F-style floating pt: UNUSED
 248 --  | GF    -- VAX G-style floating pt: UNUSED
 249 --  | DF    -- VAX D-style floating pt: UNUSED
 250 --  | SF    -- IEEE single-precision floating pt: UNUSED
 251     | TF    -- IEEE double-precision floating pt
 252 #endif
 253 #if i386_TARGET_ARCH
 254     = B     -- byte (signed)
 255     | Bu    -- byte (unsigned)
 256     | W     -- word (signed)
 257     | Wu    -- word (unsigned)
 258     | L     -- longword (signed)
 259     | Lu    -- longword (unsigned)
 260     | F     -- IEEE single-precision floating pt
 261     | DF    -- IEEE single-precision floating pt
 262     | F80   -- Intel 80-bit internal FP format; only used for spilling
 263 #endif
 264 #if sparc_TARGET_ARCH
 265     = B     -- byte (signed)
 266     | Bu    -- byte (unsigned)
 267     | W     -- word (4 bytes)
 268     | F     -- IEEE single-precision floating pt
 269     | DF    -- IEEE single-precision floating pt
 270 #endif
 271
 272 primRepToSize :: PrimRep -> Size
 273
 274 primRepToSize PtrRep        = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 275 primRepToSize CodePtrRep    = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 276 primRepToSize DataPtrRep    = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 277 primRepToSize RetRep        = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 278 primRepToSize CostCentreRep = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 279 primRepToSize CharRep       = IF_ARCH_alpha(L,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 280
 281 primRepToSize Int8Rep       = IF_ARCH_alpha(B,  IF_ARCH_i386(B,  IF_ARCH_sparc(B,  )))
 282 primRepToSize Int16Rep      = IF_ARCH_alpha(err,IF_ARCH_i386(W,  IF_ARCH_sparc(err,)))
 283     where err = primRepToSize_fail "Int16Rep"
 284 primRepToSize Int32Rep      = IF_ARCH_alpha(L,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 285 primRepToSize Word8Rep      = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
 286 primRepToSize Word16Rep     = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
 287     where err = primRepToSize_fail "Word16Rep"
 288 primRepToSize Word32Rep     = IF_ARCH_alpha(L,  IF_ARCH_i386(Lu, IF_ARCH_sparc(W,  )))
 289
 290 primRepToSize IntRep        = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 291 primRepToSize WordRep       = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 292 primRepToSize AddrRep       = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 293 primRepToSize FloatRep      = IF_ARCH_alpha(TF, IF_ARCH_i386(F,  IF_ARCH_sparc(F,  )))
 294 primRepToSize DoubleRep     = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
 295 primRepToSize ArrayRep      = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 296 primRepToSize ByteArrayRep  = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 297 primRepToSize PrimPtrRep    = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 298 primRepToSize WeakPtrRep    = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 299 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 300 primRepToSize BCORep        = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 301 primRepToSize StablePtrRep  = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 302 primRepToSize StableNameRep = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 303 primRepToSize ThreadIdRep   = IF_ARCH_alpha(Q,  IF_ARCH_i386(L,  IF_ARCH_sparc(W,  )))
 304
 305 primRepToSize Word64Rep     = primRepToSize_fail "Word64Rep"
 306 primRepToSize Int64Rep      = primRepToSize_fail "Int64Rep"
 307 primRepToSize other         = primRepToSize_fail (showSDoc (ppr other))
 308
 309 primRepToSize_fail str
 310    = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
 311             ++ "Workaround: use -fvia-C.\n\t"
 312             ++ "Perhaps you should report it as a GHC bug,\n\t"
 313             ++ "to glasgow-haskell-bugs@haskell.org.")
 314
 315 \end{code}
 316
 317 %************************************************************************
 318 %*                                                                      *
 319 \subsection{Machine's assembly language}
 320 %*                                                                      *
 321 %************************************************************************
 322
 323 We have a few common ``instructions'' (nearly all the pseudo-ops) but
 324 mostly all of @Instr@ is machine-specific.
 325
 326 \begin{code}
 327 data Instr
 328   = COMMENT FAST_STRING         -- comment pseudo-op
 329   | SEGMENT CodeSegment         -- {data,text} segment pseudo-op
 330   | LABEL   CLabel              -- global label pseudo-op
 331   | ASCII   Bool                -- True <=> needs backslash conversion
 332             String              -- the literal string
 333   | DATA    Size
 334             [Imm]
 335   | DELTA   Int                 -- specify current stack offset for
 336                                 -- benefit of subsequent passes
 337 \end{code}
 338
 339 \begin{code}
 340 #if alpha_TARGET_ARCH
 341
 342 -- data Instr continues...
 343
 344 -- Loads and stores.
 345
 346               | LD            Size Reg MachRegsAddr -- size, dst, src
 347               | LDA           Reg MachRegsAddr      -- dst, src
 348               | LDAH          Reg MachRegsAddr      -- dst, src
 349               | LDGP          Reg MachRegsAddr      -- dst, src
 350               | LDI           Size Reg Imm     -- size, dst, src
 351               | ST            Size Reg MachRegsAddr -- size, src, dst
 352
 353 -- Int Arithmetic.
 354
 355               | CLR           Reg                   -- dst
 356               | ABS           Size RI Reg           -- size, src, dst
 357               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 358               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 359               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 360               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 361               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 362               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 363               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 364               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 365
 366 -- Simple bit-twiddling.
 367
 368               | NOT           RI Reg
 369               | AND           Reg RI Reg
 370               | ANDNOT        Reg RI Reg
 371               | OR            Reg RI Reg
 372               | ORNOT         Reg RI Reg
 373               | XOR           Reg RI Reg
 374               | XORNOT        Reg RI Reg
 375               | SLL           Reg RI Reg
 376               | SRL           Reg RI Reg
 377               | SRA           Reg RI Reg
 378
 379               | ZAP           Reg RI Reg
 380               | ZAPNOT        Reg RI Reg
 381
 382               | NOP
 383
 384 -- Comparison
 385
 386               | CMP           Cond Reg RI Reg
 387
 388 -- Float Arithmetic.
 389
 390               | FCLR          Reg
 391               | FABS          Reg Reg
 392               | FNEG          Size Reg Reg
 393               | FADD          Size Reg Reg Reg
 394               | FDIV          Size Reg Reg Reg
 395               | FMUL          Size Reg Reg Reg
 396               | FSUB          Size Reg Reg Reg
 397               | CVTxy         Size Size Reg Reg
 398               | FCMP          Size Cond Reg Reg Reg
 399               | FMOV          Reg Reg
 400
 401 -- Jumping around.
 402
 403               | BI            Cond Reg Imm
 404               | BF            Cond Reg Imm
 405               | BR            Imm
 406               | JMP           Reg MachRegsAddr Int
 407               | BSR           Imm Int
 408               | JSR           Reg MachRegsAddr Int
 409
 410 -- Alpha-specific pseudo-ops.
 411
 412               | FUNBEGIN CLabel
 413               | FUNEND CLabel
 414
 415 data RI
 416   = RIReg Reg
 417   | RIImm Imm
 418
 419 #endif {- alpha_TARGET_ARCH -}
 420 \end{code}
 421
 422 Intel, in their infinite wisdom, selected a stack model for floating
 423 point registers on x86.  That might have made sense back in 1979 --
 424 nowadays we can see it for the nonsense it really is.  A stack model
 425 fits poorly with the existing nativeGen infrastructure, which assumes
 426 flat integer and FP register sets.  Prior to this commit, nativeGen
 427 could not generate correct x86 FP code -- to do so would have meant
 428 somehow working the register-stack paradigm into the register
 429 allocator and spiller, which sounds very difficult.
 430
 431 We have decided to cheat, and go for a simple fix which requires no
 432 infrastructure modifications, at the expense of generating ropey but
 433 correct FP code.  All notions of the x86 FP stack and its insns have
 434 been removed.  Instead, we pretend (to the instruction selector and
 435 register allocator) that x86 has six floating point registers, %fake0
 436 .. %fake5, which can be used in the usual flat manner.  We further
 437 claim that x86 has floating point instructions very similar to SPARC
 438 and Alpha, that is, a simple 3-operand register-register arrangement.
 439 Code generation and register allocation proceed on this basis.
 440
 441 When we come to print out the final assembly, our convenient fiction
 442 is converted to dismal reality.  Each fake instruction is
 443 independently converted to a series of real x86 instructions.
 444 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 445 arithmetic operations, the two operands are pushed onto the top of the
 446 FP stack, the operation done, and the result copied back into the
 447 relevant register.  There are only six %fake registers because 2 are
 448 needed for the translation, and x86 has 8 in total.
 449
 450 The translation is inefficient but is simple and it works.  A cleverer
 451 translation would handle a sequence of insns, simulating the FP stack
 452 contents, would not impose a fixed mapping from %fake to %st regs, and
 453 hopefully could avoid most of the redundant reg-reg moves of the
 454 current translation.
 455
 456 We might as well make use of whatever unique FP facilities Intel have
 457 chosen to bless us with (let's not be churlish, after all).
 458 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 459
 460 LATER (10 Nov 2000): idiv gives problems with the register spiller,
 461 because the spiller is simpleminded and because idiv has fixed uses of
 462 %eax and %edx.  Rather than make the spiller cleverer, we do away with
 463 idiv, and instead have iquot and irem fake (integer) insns, which have
 464 no operand register constraints -- ie, they behave like add, sub, mul.
 465 The printer-outer transforms them to a sequence of real insns which does
 466 the Right Thing (tm).  As with the FP stuff, this gives ropey code,
 467 but we don't care, since it doesn't get used much.  We hope.
 468
 469 \begin{code}
 470 #if i386_TARGET_ARCH
 471
 472 -- data Instr continues...
 473
 474 -- Moves.
 475
 476               | MOV           Size Operand Operand
 477               | MOVZxL        Size Operand Operand -- size is the size of operand 1
 478               | MOVSxL        Size Operand Operand -- size is the size of operand 1
 479
 480 -- Load effective address (also a very useful three-operand add instruction :-)
 481
 482               | LEA           Size Operand Operand
 483
 484 -- Int Arithmetic.
 485
 486               | ADD           Size Operand Operand
 487               | SUB           Size Operand Operand
 488               | IMUL          Size Operand Operand      -- signed int mul
 489               | MUL           Size Operand Operand      -- unsigned int mul
 490
 491 -- Quotient and remainder.  SEE comment above -- these are not
 492 -- real x86 insns; instead they are expanded when printed
 493 -- into a sequence of real insns.
 494
 495               | IQUOT         Size Operand Operand      -- signed quotient
 496               | IREM          Size Operand Operand      -- signed remainder
 497               | QUOT          Size Operand Operand      -- unsigned quotient
 498               | REM           Size Operand Operand      -- unsigned remainder
 499
 500 -- Simple bit-twiddling.
 501
 502               | AND           Size Operand Operand
 503               | OR            Size Operand Operand
 504               | XOR           Size Operand Operand
 505               | NOT           Size Operand
 506               | NEGI          Size Operand -- NEG instruction (name clash with Cond)
 507               | SHL           Size Imm Operand -- Only immediate shifts allowed
 508               | SAR           Size Imm Operand -- Only immediate shifts allowed
 509               | SHR           Size Imm Operand -- Only immediate shifts allowed
 510               | BT            Size Imm Operand
 511               | NOP
 512
 513 -- Float Arithmetic.
 514
 515 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 516 -- as single instructions right up until we spit them out.
 517
 518               -- all the 3-operand fake fp insns are src1 src2 dst
 519               -- and furthermore are constrained to be fp regs only.
 520               -- IMPORTANT: keep is_G_insn up to date with any changes here
 521               | GMOV          Reg Reg -- src(fpreg), dst(fpreg)
 522               | GLD           Size MachRegsAddr Reg -- src, dst(fpreg)
 523               | GST           Size Reg MachRegsAddr -- src(fpreg), dst
 524
 525               | GLDZ          Reg -- dst(fpreg)
 526               | GLD1          Reg -- dst(fpreg)
 527
 528               | GFTOI         Reg Reg -- src(fpreg), dst(intreg)
 529               | GDTOI         Reg Reg -- src(fpreg), dst(intreg)
 530
 531               | GITOF         Reg Reg -- src(intreg), dst(fpreg)
 532               | GITOD         Reg Reg -- src(intreg), dst(fpreg)
 533
 534               | GADD          Size Reg Reg Reg -- src1, src2, dst
 535               | GDIV          Size Reg Reg Reg -- src1, src2, dst
 536               | GSUB          Size Reg Reg Reg -- src1, src2, dst
 537               | GMUL          Size Reg Reg Reg -- src1, src2, dst
 538
 539               | GCMP          Size Reg Reg -- src1, src2
 540
 541               | GABS          Size Reg Reg -- src, dst
 542               | GNEG          Size Reg Reg -- src, dst
 543               | GSQRT         Size Reg Reg -- src, dst
 544               | GSIN          Size Reg Reg -- src, dst
 545               | GCOS          Size Reg Reg -- src, dst
 546               | GTAN          Size Reg Reg -- src, dst
 547
 548               | GFREE         -- do ffree on all x86 regs; an ugly hack
 549 -- Comparison
 550
 551               | TEST          Size Operand Operand
 552               | CMP           Size Operand Operand
 553               | SETCC         Cond Operand
 554
 555 -- Stack Operations.
 556
 557               | PUSH          Size Operand
 558               | POP           Size Operand
 559               | PUSHA
 560               | POPA
 561
 562 -- Jumping around.
 563
 564               | JMP           DestInfo Operand -- possible dests, target
 565               | JXX           Cond CLabel -- target
 566               | CALL          Imm
 567
 568 -- Other things.
 569
 570               | CLTD -- sign extend %eax into %edx:%eax
 571
 572 data Operand
 573   = OpReg  Reg          -- register
 574   | OpImm  Imm          -- immediate value
 575   | OpAddr MachRegsAddr -- memory reference
 576
 577
 578 i386_insert_ffrees :: [Instr] -> [Instr]
 579 i386_insert_ffrees insns
 580    | any is_G_instr insns
 581    = concatMap ffree_before_nonlocal_transfers insns
 582    | otherwise
 583    = insns
 584
 585 ffree_before_nonlocal_transfers insn
 586    = case insn of
 587         CALL _                                        -> [GFREE, insn]
 588         -- Jumps to immediate labels are local
 589         JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
 590         -- If a jump mentions dests, it is a local jump thru
 591         -- a case table.
 592         JMP (DestInfo _) _                            -> [insn]
 593         JMP _ _                                       -> [GFREE, insn]
 594         other                                         -> [insn]
 595
 596
 597 -- if you ever add a new FP insn to the fake x86 FP insn set,
 598 -- you must update this too
 599 is_G_instr :: Instr -> Bool
 600 is_G_instr instr
 601    = case instr of
 602         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
 603         GLDZ _ -> True; GLD1 _ -> True;
 604         GFTOI _ _ -> True; GDTOI _ _ -> True;
 605         GITOF _ _ -> True; GITOD _ _ -> True;
 606         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 607         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 608         GCMP _ _ _ -> True; GABS _ _ _ -> True
 609         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 610         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
 611         GFREE -> panic "is_G_instr: GFREE (!)"
 612         other -> False
 613
 614 #endif {- i386_TARGET_ARCH -}
 615 \end{code}
 616
 617 \begin{code}
 618 #if sparc_TARGET_ARCH
 619
 620 -- data Instr continues...
 621
 622 -- Loads and stores.
 623
 624               | LD            Size MachRegsAddr Reg -- size, src, dst
 625               | ST            Size Reg MachRegsAddr -- size, src, dst
 626
 627 -- Int Arithmetic.
 628
 629               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 630               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 631
 632 -- Simple bit-twiddling.
 633
 634               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 635               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 636               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 637               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 638               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 639               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 640               | SLL           Reg RI Reg -- src1, src2, dst
 641               | SRL           Reg RI Reg -- src1, src2, dst
 642               | SRA           Reg RI Reg -- src1, src2, dst
 643               | SETHI         Imm Reg -- src, dst
 644               | NOP           -- Really SETHI 0, %g0, but worth an alias
 645
 646 -- Float Arithmetic.
 647
 648 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
 649 -- right up until we spit them out.
 650
 651               | FABS          Size Reg Reg -- src dst
 652               | FADD          Size Reg Reg Reg -- src1, src2, dst
 653               | FCMP          Bool Size Reg Reg -- exception?, src1, src2, dst
 654               | FDIV          Size Reg Reg Reg -- src1, src2, dst
 655               | FMOV          Size Reg Reg -- src, dst
 656               | FMUL          Size Reg Reg Reg -- src1, src2, dst
 657               | FNEG          Size Reg Reg -- src, dst
 658               | FSQRT         Size Reg Reg -- src, dst
 659               | FSUB          Size Reg Reg Reg -- src1, src2, dst
 660               | FxTOy         Size Size Reg Reg -- src, dst
 661
 662 -- Jumping around.
 663
 664               | BI            Cond Bool Imm -- cond, annul?, target
 665               | BF            Cond Bool Imm -- cond, annul?, target
 666
 667               | JMP           DestInfo MachRegsAddr      -- target
 668               | CALL          Imm Int Bool -- target, args, terminal
 669
 670 data RI = RIReg Reg
 671         | RIImm Imm
 672
 673 riZero :: RI -> Bool
 674
 675 riZero (RIImm (ImmInt 0))           = True
 676 riZero (RIImm (ImmInteger 0))       = True
 677 riZero (RIReg (RealReg 0))          = True
 678 riZero _                            = False
 679
 680 -- Calculate the effective address which would be used by the
 681 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 682 -- alas -- can't have fpRelEA here because of module dependencies.
 683 fpRelEA :: Int -> Reg -> Instr
 684 fpRelEA n dst
 685    = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
 686
 687 -- Code to shift the stack pointer by n words.
 688 moveSp :: Int -> Instr
 689 moveSp n
 690    = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
 691
 692 -- Produce the second-half-of-a-double register given the first half.
 693 fPair :: Reg -> Reg
 694 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 695 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 696 #endif {- sparc_TARGET_ARCH -}
 697 \end{code}