2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( callerSaves,
46 get_MagicId_addr, get_MagicId_reg_or_addr,
47 Imm(..), Reg(..), MachRegsAddr(..)
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixStmt(..), StixExpr(..), StixReg(..),
54 CodeSegment, DestInfo(..) )
55 import Panic ( panic )
56 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
57 import Outputable ( pprPanic, ppr, showSDoc )
58 import IOExts ( trace )
59 import Config ( cLeadingUnderscore )
62 import Maybe ( catMaybes )
66 underscorePrefix :: Bool -- leading underscore on assembler labels?
67 underscorePrefix = (cLeadingUnderscore == "YES")
69 ---------------------------
70 fmtAsmLbl :: String -> String -- for formatting labels
74 {- The alpha assembler likes temporary labels to look like $L123
75 instead of L123. (Don't toss the L, because then Lf28
84 % ----------------------------------------------------------------
86 We (allegedly) put the first six C-call arguments in registers;
87 where do we start putting the rest of them?
89 eXTRA_STK_ARGS_HERE :: Int
91 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96 Size of a @PrimRep@, in bytes.
99 sizeOf :: PrimRep -> Int{-in bytes-}
100 sizeOf pr = case primRepToSize pr of
101 IF_ARCH_alpha({B->1; Bu->1; {-W->2; Wu->2;-} L->4; {-SF->4;-} Q->8; TF->8},)
102 IF_ARCH_i386 ({B->1; Bu->1; W->2; Wu->2; L->4; Lu->4; F->4; DF->8; F80->10},)
103 IF_ARCH_sparc({B->1; Bu->1; W->4; F->4; DF->8},)
106 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
108 Now the volatile saves and restores. We add the basic guys to the
109 list of ``user'' registers provided. Note that there are more basic
110 registers on the restore list, because some are reloaded from
113 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
116 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
118 volatileSaves = volatileSavesOrRestores True
119 volatileRestores = volatileSavesOrRestores False
121 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
122 restore_cands = save_cands
124 volatileSavesOrRestores do_saves vols
125 = catMaybes (map mkCode vols)
128 | not (callerSaves mid)
130 | otherwise -- must be callee-saves ...
131 = case get_MagicId_reg_or_addr mid of
132 -- If stored in BaseReg, we ain't interested
136 -- OK, it's callee-saves, and in a real reg (rrno).
137 -- We have to cook up some transfer code.
138 {- Note that the use of (StixMagicId mid) here is a bit subtle.
139 Here, we only create those for MagicIds which are stored in
140 a real reg on this arch -- the preceding case on the result
141 of get_MagicId_reg_or_addr guarantees this. Later, when
142 selecting insns, that means these assignments are sure to turn
143 into real reg-to-mem or mem-to-reg moves, rather than being
144 pointless moves from some address in the reg-table
147 -> Just (StAssignMem rep addr
148 (StReg (StixMagicId mid)))
150 -> Just (StAssignReg rep (StixMagicId mid)
153 rep = magicIdPrimRep mid
154 addr = get_MagicId_addr mid
157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 Obviously slightly weedy
160 (Note that the floating point values aren't terribly important.)
163 targetMinDouble = MachDouble (-1.7976931348623157e+308)
164 targetMaxDouble = MachDouble (1.7976931348623157e+308)
165 targetMinInt = mkMachInt (-2147483648)
166 targetMaxInt = mkMachInt 2147483647
169 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
171 This algorithm for determining the $\log_2$ of exact powers of 2 comes
172 from GCC. It requires bit manipulation primitives, and we use GHC
179 exactLog2 :: Integer -> Maybe Integer
181 = if (x <= 0 || x >= 2147483648) then
184 case iUnbox (fromInteger x) of { x# ->
185 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
188 Just (toInteger (iBox (pow2 x#)))
191 shiftr x y = shiftRL# x y
193 pow2 x# | x# ==# 1# = 0#
194 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
197 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
201 #if alpha_TARGET_ARCH
202 = ALWAYS -- For BI (same as BR)
203 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
205 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
206 | LE -- For CMP and BI
207 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
209 | NEVER -- For BI (null instruction)
210 | ULE -- For CMP only
211 | ULT -- For CMP only
214 = ALWAYS -- What's really used? ToDo
230 #if sparc_TARGET_ARCH
231 = ALWAYS -- What's really used? ToDo
252 #if alpha_TARGET_ARCH
255 -- | W -- word (2 bytes): UNUSED
257 | L -- longword (4 bytes)
258 | Q -- quadword (8 bytes)
259 -- | FF -- VAX F-style floating pt: UNUSED
260 -- | GF -- VAX G-style floating pt: UNUSED
261 -- | DF -- VAX D-style floating pt: UNUSED
262 -- | SF -- IEEE single-precision floating pt: UNUSED
263 | TF -- IEEE double-precision floating pt
267 | Bu -- byte (unsigned)
269 | Wu -- word (unsigned)
270 | L -- longword (signed)
271 | Lu -- longword (unsigned)
272 | F -- IEEE single-precision floating pt
273 | DF -- IEEE single-precision floating pt
274 | F80 -- Intel 80-bit internal FP format; only used for spilling
276 #if sparc_TARGET_ARCH
278 | Bu -- byte (unsigned)
279 | W -- word (4 bytes)
280 | F -- IEEE single-precision floating pt
281 | DF -- IEEE single-precision floating pt
284 primRepToSize :: PrimRep -> Size
286 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
287 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
288 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
289 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
290 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
291 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
293 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
294 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(err,)))
295 where err = primRepToSize_fail "Int16Rep"
296 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
298 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
299 where err = primRepToSize_fail "Word16Rep"
300 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
302 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
303 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
304 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
305 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
306 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
307 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
308 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
309 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
310 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
311 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
312 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
313 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
314 primRepToSize StableNameRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
315 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
317 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
318 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
319 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
321 primRepToSize_fail str
322 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
323 ++ "Workaround: use -fvia-C.\n\t"
324 ++ "Perhaps you should report it as a GHC bug,\n\t"
325 ++ "to glasgow-haskell-bugs@haskell.org.")
329 %************************************************************************
331 \subsection{Machine's assembly language}
333 %************************************************************************
335 We have a few common ``instructions'' (nearly all the pseudo-ops) but
336 mostly all of @Instr@ is machine-specific.
340 = COMMENT FAST_STRING -- comment pseudo-op
341 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
342 | LABEL CLabel -- global label pseudo-op
343 | ASCII Bool -- True <=> needs backslash conversion
344 String -- the literal string
347 | DELTA Int -- specify current stack offset for
348 -- benefit of subsequent passes
352 #if alpha_TARGET_ARCH
354 -- data Instr continues...
358 | LD Size Reg MachRegsAddr -- size, dst, src
359 | LDA Reg MachRegsAddr -- dst, src
360 | LDAH Reg MachRegsAddr -- dst, src
361 | LDGP Reg MachRegsAddr -- dst, src
362 | LDI Size Reg Imm -- size, dst, src
363 | ST Size Reg MachRegsAddr -- size, src, dst
368 | ABS Size RI Reg -- size, src, dst
369 | NEG Size Bool RI Reg -- size, overflow, src, dst
370 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
371 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
372 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
373 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
374 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
375 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
376 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
378 -- Simple bit-twiddling.
398 | CMP Cond Reg RI Reg
405 | FADD Size Reg Reg Reg
406 | FDIV Size Reg Reg Reg
407 | FMUL Size Reg Reg Reg
408 | FSUB Size Reg Reg Reg
409 | CVTxy Size Size Reg Reg
410 | FCMP Size Cond Reg Reg Reg
418 | JMP Reg MachRegsAddr Int
420 | JSR Reg MachRegsAddr Int
422 -- Alpha-specific pseudo-ops.
431 #endif {- alpha_TARGET_ARCH -}
434 Intel, in their infinite wisdom, selected a stack model for floating
435 point registers on x86. That might have made sense back in 1979 --
436 nowadays we can see it for the nonsense it really is. A stack model
437 fits poorly with the existing nativeGen infrastructure, which assumes
438 flat integer and FP register sets. Prior to this commit, nativeGen
439 could not generate correct x86 FP code -- to do so would have meant
440 somehow working the register-stack paradigm into the register
441 allocator and spiller, which sounds very difficult.
443 We have decided to cheat, and go for a simple fix which requires no
444 infrastructure modifications, at the expense of generating ropey but
445 correct FP code. All notions of the x86 FP stack and its insns have
446 been removed. Instead, we pretend (to the instruction selector and
447 register allocator) that x86 has six floating point registers, %fake0
448 .. %fake5, which can be used in the usual flat manner. We further
449 claim that x86 has floating point instructions very similar to SPARC
450 and Alpha, that is, a simple 3-operand register-register arrangement.
451 Code generation and register allocation proceed on this basis.
453 When we come to print out the final assembly, our convenient fiction
454 is converted to dismal reality. Each fake instruction is
455 independently converted to a series of real x86 instructions.
456 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
457 arithmetic operations, the two operands are pushed onto the top of the
458 FP stack, the operation done, and the result copied back into the
459 relevant register. There are only six %fake registers because 2 are
460 needed for the translation, and x86 has 8 in total.
462 The translation is inefficient but is simple and it works. A cleverer
463 translation would handle a sequence of insns, simulating the FP stack
464 contents, would not impose a fixed mapping from %fake to %st regs, and
465 hopefully could avoid most of the redundant reg-reg moves of the
468 We might as well make use of whatever unique FP facilities Intel have
469 chosen to bless us with (let's not be churlish, after all).
470 Hence GLDZ and GLD1. Bwahahahahahahaha!
472 LATER (10 Nov 2000): idiv gives problems with the register spiller,
473 because the spiller is simpleminded and because idiv has fixed uses of
474 %eax and %edx. Rather than make the spiller cleverer, we do away with
475 idiv, and instead have iquot and irem fake (integer) insns, which have
476 no operand register constraints -- ie, they behave like add, sub, mul.
477 The printer-outer transforms them to a sequence of real insns which does
478 the Right Thing (tm). As with the FP stuff, this gives ropey code,
479 but we don't care, since it doesn't get used much. We hope.
484 -- data Instr continues...
488 | MOV Size Operand Operand
489 | MOVZxL Size Operand Operand -- size is the size of operand 1
490 | MOVSxL Size Operand Operand -- size is the size of operand 1
492 -- Load effective address (also a very useful three-operand add instruction :-)
494 | LEA Size Operand Operand
498 | ADD Size Operand Operand
499 | SUB Size Operand Operand
500 | IMUL Size Operand Operand -- signed int mul
501 | MUL Size Operand Operand -- unsigned int mul
503 -- Quotient and remainder. SEE comment above -- these are not
504 -- real x86 insns; instead they are expanded when printed
505 -- into a sequence of real insns.
507 | IQUOT Size Operand Operand -- signed quotient
508 | IREM Size Operand Operand -- signed remainder
509 | QUOT Size Operand Operand -- unsigned quotient
510 | REM Size Operand Operand -- unsigned remainder
512 -- Simple bit-twiddling.
514 | AND Size Operand Operand
515 | OR Size Operand Operand
516 | XOR Size Operand Operand
518 | NEGI Size Operand -- NEG instruction (name clash with Cond)
519 | SHL Size Imm Operand -- Only immediate shifts allowed
520 | SAR Size Imm Operand -- Only immediate shifts allowed
521 | SHR Size Imm Operand -- Only immediate shifts allowed
522 | BT Size Imm Operand
527 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
528 -- as single instructions right up until we spit them out.
530 -- all the 3-operand fake fp insns are src1 src2 dst
531 -- and furthermore are constrained to be fp regs only.
532 -- IMPORTANT: keep is_G_insn up to date with any changes here
533 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
534 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
535 | GST Size Reg MachRegsAddr -- src(fpreg), dst
537 | GLDZ Reg -- dst(fpreg)
538 | GLD1 Reg -- dst(fpreg)
540 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
541 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
543 | GITOF Reg Reg -- src(intreg), dst(fpreg)
544 | GITOD Reg Reg -- src(intreg), dst(fpreg)
546 | GADD Size Reg Reg Reg -- src1, src2, dst
547 | GDIV Size Reg Reg Reg -- src1, src2, dst
548 | GSUB Size Reg Reg Reg -- src1, src2, dst
549 | GMUL Size Reg Reg Reg -- src1, src2, dst
551 | GCMP Size Reg Reg -- src1, src2
553 | GABS Size Reg Reg -- src, dst
554 | GNEG Size Reg Reg -- src, dst
555 | GSQRT Size Reg Reg -- src, dst
556 | GSIN Size Reg Reg -- src, dst
557 | GCOS Size Reg Reg -- src, dst
558 | GTAN Size Reg Reg -- src, dst
560 | GFREE -- do ffree on all x86 regs; an ugly hack
563 | TEST Size Operand Operand
564 | CMP Size Operand Operand
576 | JMP DestInfo Operand -- possible dests, target
577 | JXX Cond CLabel -- target
582 | CLTD -- sign extend %eax into %edx:%eax
585 = OpReg Reg -- register
586 | OpImm Imm -- immediate value
587 | OpAddr MachRegsAddr -- memory reference
590 i386_insert_ffrees :: [Instr] -> [Instr]
591 i386_insert_ffrees insns
592 | any is_G_instr insns
593 = concatMap ffree_before_nonlocal_transfers insns
597 ffree_before_nonlocal_transfers insn
599 CALL _ -> [GFREE, insn]
600 -- Jumps to immediate labels are local
601 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
602 -- If a jump mentions dests, it is a local jump thru
604 JMP (DestInfo _) _ -> [insn]
605 JMP _ _ -> [GFREE, insn]
609 -- if you ever add a new FP insn to the fake x86 FP insn set,
610 -- you must update this too
611 is_G_instr :: Instr -> Bool
614 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
615 GLDZ _ -> True; GLD1 _ -> True;
616 GFTOI _ _ -> True; GDTOI _ _ -> True;
617 GITOF _ _ -> True; GITOD _ _ -> True;
618 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
619 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
620 GCMP _ _ _ -> True; GABS _ _ _ -> True
621 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
622 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
623 GFREE -> panic "is_G_instr: GFREE (!)"
626 #endif {- i386_TARGET_ARCH -}
630 #if sparc_TARGET_ARCH
632 -- data Instr continues...
636 | LD Size MachRegsAddr Reg -- size, src, dst
637 | ST Size Reg MachRegsAddr -- size, src, dst
641 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
642 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
644 -- Simple bit-twiddling.
646 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
647 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
648 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
649 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
650 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
651 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
652 | SLL Reg RI Reg -- src1, src2, dst
653 | SRL Reg RI Reg -- src1, src2, dst
654 | SRA Reg RI Reg -- src1, src2, dst
655 | SETHI Imm Reg -- src, dst
656 | NOP -- Really SETHI 0, %g0, but worth an alias
660 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
661 -- right up until we spit them out.
663 | FABS Size Reg Reg -- src dst
664 | FADD Size Reg Reg Reg -- src1, src2, dst
665 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
666 | FDIV Size Reg Reg Reg -- src1, src2, dst
667 | FMOV Size Reg Reg -- src, dst
668 | FMUL Size Reg Reg Reg -- src1, src2, dst
669 | FNEG Size Reg Reg -- src, dst
670 | FSQRT Size Reg Reg -- src, dst
671 | FSUB Size Reg Reg Reg -- src1, src2, dst
672 | FxTOy Size Size Reg Reg -- src, dst
676 | BI Cond Bool Imm -- cond, annul?, target
677 | BF Cond Bool Imm -- cond, annul?, target
679 | JMP DestInfo MachRegsAddr -- target
680 | CALL Imm Int Bool -- target, args, terminal
687 riZero (RIImm (ImmInt 0)) = True
688 riZero (RIImm (ImmInteger 0)) = True
689 riZero (RIReg (RealReg 0)) = True
692 -- Calculate the effective address which would be used by the
693 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
694 -- alas -- can't have fpRelEA here because of module dependencies.
695 fpRelEA :: Int -> Reg -> Instr
697 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
699 -- Code to shift the stack pointer by n words.
700 moveSp :: Int -> Instr
702 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
704 -- Produce the second-half-of-a-double register given the first half.
706 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
707 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
708 #endif {- sparc_TARGET_ARCH -}