2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment, DestInfo(..) )
54 import Panic ( panic )
55 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
56 import Outputable ( pprPanic, ppr, showSDoc )
57 import IOExts ( trace )
58 import Config ( cLeadingUnderscore )
63 underscorePrefix :: Bool -- leading underscore on assembler labels?
64 underscorePrefix = (cLeadingUnderscore == "YES")
66 ---------------------------
67 fmtAsmLbl :: String -> String -- for formatting labels
71 {- The alpha assembler likes temporary labels to look like $L123
72 instead of L123. (Don't toss the L, because then Lf28
81 % ----------------------------------------------------------------
83 We (allegedly) put the first six C-call arguments in registers;
84 where do we start putting the rest of them?
86 eXTRA_STK_ARGS_HERE :: Int
88 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
91 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
93 Size of a @PrimRep@, in bytes.
96 sizeOf :: PrimRep -> Int{-in bytes-}
97 sizeOf pr = case primRepToSize pr of
98 IF_ARCH_alpha({B->1; Bu->1; {-W->2; Wu->2;-} L->4; {-SF->4;-} Q->8; TF->8},)
99 IF_ARCH_i386 ({B->1; Bu->1; W->2; Wu->2; L->4; Lu->4; F->4; DF->8; F80->10},)
100 IF_ARCH_sparc({B->1; Bu->1; W->4; F->4; DF->8},)
103 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
105 Now the volatile saves and restores. We add the basic guys to the
106 list of ``user'' registers provided. Note that there are more basic
107 registers on the restore list, because some are reloaded from
110 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
113 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
115 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
116 restore_cands = save_cands
119 = map save ((filter callerSaves) (save_cands ++ vols))
121 save x = StAssign (magicIdPrimRep x) loc reg
123 reg = StReg (StixMagicId x)
124 loc = case stgReg x of
126 Always _ -> panic "volatileSaves"
128 volatileRestores vols
129 = map restore ((filter callerSaves) (restore_cands ++ vols))
131 restore x = StAssign (magicIdPrimRep x) reg loc
133 reg = StReg (StixMagicId x)
134 loc = case stgReg x of
136 Always _ -> panic "volatileRestores"
139 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141 Obviously slightly weedy
142 (Note that the floating point values aren't terribly important.)
145 targetMinDouble = MachDouble (-1.7976931348623157e+308)
146 targetMaxDouble = MachDouble (1.7976931348623157e+308)
147 targetMinInt = mkMachInt (-2147483648)
148 targetMaxInt = mkMachInt 2147483647
151 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
153 This algorithm for determining the $\log_2$ of exact powers of 2 comes
154 from GCC. It requires bit manipulation primitives, and we use GHC
161 exactLog2 :: Integer -> Maybe Integer
163 = if (x <= 0 || x >= 2147483648) then
166 case iUnbox (fromInteger x) of { x# ->
167 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
170 Just (toInteger (iBox (pow2 x#)))
173 shiftr x y = shiftRL# x y
175 pow2 x# | x# ==# 1# = 0#
176 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
179 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183 #if alpha_TARGET_ARCH
184 = ALWAYS -- For BI (same as BR)
185 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
187 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
188 | LE -- For CMP and BI
189 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
191 | NEVER -- For BI (null instruction)
192 | ULE -- For CMP only
193 | ULT -- For CMP only
196 = ALWAYS -- What's really used? ToDo
210 #if sparc_TARGET_ARCH
211 = ALWAYS -- What's really used? ToDo
232 #if alpha_TARGET_ARCH
235 -- | W -- word (2 bytes): UNUSED
237 | L -- longword (4 bytes)
238 | Q -- quadword (8 bytes)
239 -- | FF -- VAX F-style floating pt: UNUSED
240 -- | GF -- VAX G-style floating pt: UNUSED
241 -- | DF -- VAX D-style floating pt: UNUSED
242 -- | SF -- IEEE single-precision floating pt: UNUSED
243 | TF -- IEEE double-precision floating pt
247 | Bu -- byte (unsigned)
249 | Wu -- word (unsigned)
250 | L -- longword (signed)
251 | Lu -- longword (unsigned)
252 | F -- IEEE single-precision floating pt
253 | DF -- IEEE single-precision floating pt
254 | F80 -- Intel 80-bit internal FP format; only used for spilling
256 #if sparc_TARGET_ARCH
258 | Bu -- byte (unsigned)
259 | W -- word (4 bytes)
260 | F -- IEEE single-precision floating pt
261 | DF -- IEEE single-precision floating pt
264 primRepToSize :: PrimRep -> Size
266 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
267 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
268 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
269 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
270 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
271 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
273 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
274 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(err,)))
275 where err = primRepToSize_fail "Int16Rep"
276 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
277 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
278 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
279 where err = primRepToSize_fail "Word16Rep"
280 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
282 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
283 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
284 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
285 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
286 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
287 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
288 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
289 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
290 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
291 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
292 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
293 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
294 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
296 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
297 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
298 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
300 primRepToSize_fail str
301 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
302 ++ "Workaround: use -fvia-C.\n\t"
303 ++ "Perhaps you should report it as a GHC bug,\n\t"
304 ++ "to glasgow-haskell-bugs@haskell.org.")
308 %************************************************************************
310 \subsection{Machine's assembly language}
312 %************************************************************************
314 We have a few common ``instructions'' (nearly all the pseudo-ops) but
315 mostly all of @Instr@ is machine-specific.
319 = COMMENT FAST_STRING -- comment pseudo-op
320 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
321 | LABEL CLabel -- global label pseudo-op
322 | ASCII Bool -- True <=> needs backslash conversion
323 String -- the literal string
326 | DELTA Int -- specify current stack offset for
327 -- benefit of subsequent passes
331 #if alpha_TARGET_ARCH
333 -- data Instr continues...
337 | LD Size Reg MachRegsAddr -- size, dst, src
338 | LDA Reg MachRegsAddr -- dst, src
339 | LDAH Reg MachRegsAddr -- dst, src
340 | LDGP Reg MachRegsAddr -- dst, src
341 | LDI Size Reg Imm -- size, dst, src
342 | ST Size Reg MachRegsAddr -- size, src, dst
347 | ABS Size RI Reg -- size, src, dst
348 | NEG Size Bool RI Reg -- size, overflow, src, dst
349 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
350 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
351 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
352 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
353 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
354 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
355 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
357 -- Simple bit-twiddling.
377 | CMP Cond Reg RI Reg
384 | FADD Size Reg Reg Reg
385 | FDIV Size Reg Reg Reg
386 | FMUL Size Reg Reg Reg
387 | FSUB Size Reg Reg Reg
388 | CVTxy Size Size Reg Reg
389 | FCMP Size Cond Reg Reg Reg
397 | JMP Reg MachRegsAddr Int
399 | JSR Reg MachRegsAddr Int
401 -- Alpha-specific pseudo-ops.
410 #endif {- alpha_TARGET_ARCH -}
413 Intel, in their infinite wisdom, selected a stack model for floating
414 point registers on x86. That might have made sense back in 1979 --
415 nowadays we can see it for the nonsense it really is. A stack model
416 fits poorly with the existing nativeGen infrastructure, which assumes
417 flat integer and FP register sets. Prior to this commit, nativeGen
418 could not generate correct x86 FP code -- to do so would have meant
419 somehow working the register-stack paradigm into the register
420 allocator and spiller, which sounds very difficult.
422 We have decided to cheat, and go for a simple fix which requires no
423 infrastructure modifications, at the expense of generating ropey but
424 correct FP code. All notions of the x86 FP stack and its insns have
425 been removed. Instead, we pretend (to the instruction selector and
426 register allocator) that x86 has six floating point registers, %fake0
427 .. %fake5, which can be used in the usual flat manner. We further
428 claim that x86 has floating point instructions very similar to SPARC
429 and Alpha, that is, a simple 3-operand register-register arrangement.
430 Code generation and register allocation proceed on this basis.
432 When we come to print out the final assembly, our convenient fiction
433 is converted to dismal reality. Each fake instruction is
434 independently converted to a series of real x86 instructions.
435 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
436 arithmetic operations, the two operands are pushed onto the top of the
437 FP stack, the operation done, and the result copied back into the
438 relevant register. There are only six %fake registers because 2 are
439 needed for the translation, and x86 has 8 in total.
441 The translation is inefficient but is simple and it works. A cleverer
442 translation would handle a sequence of insns, simulating the FP stack
443 contents, would not impose a fixed mapping from %fake to %st regs, and
444 hopefully could avoid most of the redundant reg-reg moves of the
447 We might as well make use of whatever unique FP facilities Intel have
448 chosen to bless us with (let's not be churlish, after all).
449 Hence GLDZ and GLD1. Bwahahahahahahaha!
451 LATER (10 Nov 2000): idiv gives problems with the register spiller,
452 because the spiller is simpleminded and because idiv has fixed uses of
453 %eax and %edx. Rather than make the spiller cleverer, we do away with
454 idiv, and instead have iquot and irem fake (integer) insns, which have
455 no operand register constraints -- ie, they behave like add, sub, mul.
456 The printer-outer transforms them to a sequence of real insns which does
457 the Right Thing (tm). As with the FP stuff, this gives ropey code,
458 but we don't care, since it doesn't get used much. We hope.
463 -- data Instr continues...
467 | MOV Size Operand Operand
468 | MOVZxL Size Operand Operand -- size is the size of operand 1
469 | MOVSxL Size Operand Operand -- size is the size of operand 1
471 -- Load effective address (also a very useful three-operand add instruction :-)
473 | LEA Size Operand Operand
477 | ADD Size Operand Operand
478 | SUB Size Operand Operand
479 | IMUL Size Operand Operand
481 -- Quotient and remainder. SEE comment above -- these are not
482 -- real x86 insns; instead they are expanded when printed
483 -- into a sequence of real insns.
485 | IQUOT Size Operand Operand
486 | IREM Size Operand Operand
488 -- Simple bit-twiddling.
490 | AND Size Operand Operand
491 | OR Size Operand Operand
492 | XOR Size Operand Operand
494 | NEGI Size Operand -- NEG instruction (name clash with Cond)
495 | SHL Size Imm Operand -- Only immediate shifts allowed
496 | SAR Size Imm Operand -- Only immediate shifts allowed
497 | SHR Size Imm Operand -- Only immediate shifts allowed
498 | BT Size Imm Operand
503 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
504 -- as single instructions right up until we spit them out.
506 -- all the 3-operand fake fp insns are src1 src2 dst
507 -- and furthermore are constrained to be fp regs only.
508 -- IMPORTANT: keep is_G_insn up to date with any changes here
509 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
510 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
511 | GST Size Reg MachRegsAddr -- src(fpreg), dst
513 | GLDZ Reg -- dst(fpreg)
514 | GLD1 Reg -- dst(fpreg)
516 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
517 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
519 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
520 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
522 | GITOF Reg Reg -- src(intreg), dst(fpreg)
523 | GITOD Reg Reg -- src(intreg), dst(fpreg)
525 | GADD Size Reg Reg Reg -- src1, src2, dst
526 | GDIV Size Reg Reg Reg -- src1, src2, dst
527 | GSUB Size Reg Reg Reg -- src1, src2, dst
528 | GMUL Size Reg Reg Reg -- src1, src2, dst
530 | GCMP Size Reg Reg -- src1, src2
532 | GABS Size Reg Reg -- src, dst
533 | GNEG Size Reg Reg -- src, dst
534 | GSQRT Size Reg Reg -- src, dst
535 | GSIN Size Reg Reg -- src, dst
536 | GCOS Size Reg Reg -- src, dst
537 | GTAN Size Reg Reg -- src, dst
539 | GFREE -- do ffree on all x86 regs; an ugly hack
542 | TEST Size Operand Operand
543 | CMP Size Operand Operand
555 | JMP DestInfo Operand -- possible dests, target
556 | JXX Cond CLabel -- target
561 | CLTD -- sign extend %eax into %edx:%eax
564 = OpReg Reg -- register
565 | OpImm Imm -- immediate value
566 | OpAddr MachRegsAddr -- memory reference
569 i386_insert_ffrees :: [Instr] -> [Instr]
570 i386_insert_ffrees insns
571 | any is_G_instr insns
572 = concatMap ffree_before_nonlocal_transfers insns
576 ffree_before_nonlocal_transfers insn
578 CALL _ -> [GFREE, insn]
579 -- Jumps to immediate labels are local
580 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
581 -- If a jump mentions dests, it is a local jump thru
583 JMP (DestInfo _) _ -> [insn]
584 JMP _ _ -> [GFREE, insn]
588 -- if you ever add a new FP insn to the fake x86 FP insn set,
589 -- you must update this too
590 is_G_instr :: Instr -> Bool
593 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
594 GLDZ _ -> True; GLD1 _ -> True;
595 GFTOD _ _ -> True; GFTOI _ _ -> True;
596 GDTOF _ _ -> True; GDTOI _ _ -> True;
597 GITOF _ _ -> True; GITOD _ _ -> True;
598 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
599 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
600 GCMP _ _ _ -> True; GABS _ _ _ -> True
601 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
602 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
603 GFREE -> panic "is_G_instr: GFREE (!)"
606 #endif {- i386_TARGET_ARCH -}
610 #if sparc_TARGET_ARCH
612 -- data Instr continues...
616 | LD Size MachRegsAddr Reg -- size, src, dst
617 | ST Size Reg MachRegsAddr -- size, src, dst
621 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
622 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
624 -- Simple bit-twiddling.
626 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
627 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
628 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
629 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
630 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
631 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
632 | SLL Reg RI Reg -- src1, src2, dst
633 | SRL Reg RI Reg -- src1, src2, dst
634 | SRA Reg RI Reg -- src1, src2, dst
635 | SETHI Imm Reg -- src, dst
636 | NOP -- Really SETHI 0, %g0, but worth an alias
640 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
641 -- right up until we spit them out.
643 | FABS Size Reg Reg -- src dst
644 | FADD Size Reg Reg Reg -- src1, src2, dst
645 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
646 | FDIV Size Reg Reg Reg -- src1, src2, dst
647 | FMOV Size Reg Reg -- src, dst
648 | FMUL Size Reg Reg Reg -- src1, src2, dst
649 | FNEG Size Reg Reg -- src, dst
650 | FSQRT Size Reg Reg -- src, dst
651 | FSUB Size Reg Reg Reg -- src1, src2, dst
652 | FxTOy Size Size Reg Reg -- src, dst
656 | BI Cond Bool Imm -- cond, annul?, target
657 | BF Cond Bool Imm -- cond, annul?, target
659 | JMP DestInfo MachRegsAddr -- target
660 | CALL Imm Int Bool -- target, args, terminal
667 riZero (RIImm (ImmInt 0)) = True
668 riZero (RIImm (ImmInteger 0)) = True
669 riZero (RIReg (RealReg 0)) = True
672 -- Calculate the effective address which would be used by the
673 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
674 -- alas -- can't have fpRelEA here because of module dependencies.
675 fpRelEA :: Int -> Reg -> Instr
677 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
679 -- Code to shift the stack pointer by n words.
680 moveSp :: Int -> Instr
682 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
684 -- Produce the second-half-of-a-double register given the first half.
686 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
687 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
688 #endif {- sparc_TARGET_ARCH -}