2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( callerSaves,
46 get_MagicId_addr, get_MagicId_reg_or_addr,
47 Imm(..), Reg(..), MachRegsAddr(..)
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixStmt(..), StixExpr(..), StixReg(..),
54 CodeSegment, DestInfo(..) )
55 import Panic ( panic )
56 import Outputable ( pprPanic, ppr, showSDoc )
57 import Config ( cLeadingUnderscore )
62 import TRACE ( trace )
64 import Maybe ( catMaybes )
68 underscorePrefix :: Bool -- leading underscore on assembler labels?
69 underscorePrefix = (cLeadingUnderscore == "YES")
71 ---------------------------
72 fmtAsmLbl :: String -> String -- for formatting labels
76 {- The alpha assembler likes temporary labels to look like $L123
77 instead of L123. (Don't toss the L, because then Lf28
86 % ----------------------------------------------------------------
88 We (allegedly) put the first six C-call arguments in registers;
89 where do we start putting the rest of them?
91 eXTRA_STK_ARGS_HERE :: Int
93 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
96 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
98 Now the volatile saves and restores. We add the basic guys to the
99 list of ``user'' registers provided. Note that there are more basic
100 registers on the restore list, because some are reloaded from
103 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
106 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
108 volatileSaves = volatileSavesOrRestores True
109 volatileRestores = volatileSavesOrRestores False
111 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
112 restore_cands = save_cands
114 volatileSavesOrRestores do_saves vols
115 = catMaybes (map mkCode vols)
118 | case mid of { BaseReg -> True; _ -> False }
119 = panic "volatileSavesOrRestores:BaseReg"
120 | not (callerSaves mid)
122 | otherwise -- must be callee-saves ...
123 = case get_MagicId_reg_or_addr mid of
124 -- If stored in BaseReg, we ain't interested
128 -- OK, it's callee-saves, and in a real reg (rrno).
129 -- We have to cook up some transfer code.
130 {- Note that the use of (StixMagicId mid) here is a bit subtle.
131 Here, we only create those for MagicIds which are stored in
132 a real reg on this arch -- the preceding case on the result
133 of get_MagicId_reg_or_addr guarantees this. Later, when
134 selecting insns, that means these assignments are sure to turn
135 into real reg-to-mem or mem-to-reg moves, rather than being
136 pointless moves from some address in the reg-table
139 -> Just (StAssignMem rep addr
140 (StReg (StixMagicId mid)))
142 -> Just (StAssignReg rep (StixMagicId mid)
145 rep = magicIdPrimRep mid
146 addr = get_MagicId_addr mid
149 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
151 Obviously slightly weedy
152 (Note that the floating point values aren't terribly important.)
155 targetMinDouble = MachDouble (-1.7976931348623157e+308)
156 targetMaxDouble = MachDouble (1.7976931348623157e+308)
157 targetMinInt = mkMachInt (-2147483648)
158 targetMaxInt = mkMachInt 2147483647
161 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
163 This algorithm for determining the $\log_2$ of exact powers of 2 comes
164 from GCC. It requires bit manipulation primitives, and we use GHC
171 exactLog2 :: Integer -> Maybe Integer
173 = if (x <= 0 || x >= 2147483648) then
176 case iUnbox (fromInteger x) of { x# ->
177 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
180 Just (toInteger (iBox (pow2 x#)))
183 pow2 x# | x# ==# 1# = 0#
184 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
187 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 #if alpha_TARGET_ARCH
192 = ALWAYS -- For BI (same as BR)
193 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
195 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
196 | LE -- For CMP and BI
197 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
199 | NEVER -- For BI (null instruction)
200 | ULE -- For CMP only
201 | ULT -- For CMP only
204 = ALWAYS -- What's really used? ToDo
220 #if sparc_TARGET_ARCH
221 = ALWAYS -- What's really used? ToDo
238 deriving Eq -- to make an assertion work
243 #if alpha_TARGET_ARCH
246 -- | W -- word (2 bytes): UNUSED
248 | L -- longword (4 bytes)
249 | Q -- quadword (8 bytes)
250 -- | FF -- VAX F-style floating pt: UNUSED
251 -- | GF -- VAX G-style floating pt: UNUSED
252 -- | DF -- VAX D-style floating pt: UNUSED
253 -- | SF -- IEEE single-precision floating pt: UNUSED
254 | TF -- IEEE double-precision floating pt
258 | Bu -- byte (unsigned)
260 | Wu -- word (unsigned)
261 | L -- longword (signed)
262 | Lu -- longword (unsigned)
263 | F -- IEEE single-precision floating pt
264 | DF -- IEEE single-precision floating pt
265 | F80 -- Intel 80-bit internal FP format; only used for spilling
267 #if sparc_TARGET_ARCH
269 | Bu -- byte (unsigned)
270 | H -- halfword (signed, 2 bytes)
271 | Hu -- halfword (unsigned, 2 bytes)
272 | W -- word (4 bytes)
273 | F -- IEEE single-precision floating pt
274 | DF -- IEEE single-precision floating pt
277 primRepToSize :: PrimRep -> Size
279 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
280 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
281 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
282 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
283 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
284 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
286 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
287 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(H, )))
288 where err = primRepToSize_fail "Int16Rep"
289 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
290 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
291 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(Hu, )))
292 where err = primRepToSize_fail "Word16Rep"
293 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
295 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
296 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
298 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
299 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
300 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
302 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
303 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
304 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
306 primRepToSize_fail str
307 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
308 ++ "Workaround: use -fvia-C.\n\t"
309 ++ "Perhaps you should report it as a GHC bug,\n\t"
310 ++ "to glasgow-haskell-bugs@haskell.org.")
313 %************************************************************************
315 \subsection{Machine's assembly language}
317 %************************************************************************
319 We have a few common ``instructions'' (nearly all the pseudo-ops) but
320 mostly all of @Instr@ is machine-specific.
324 = COMMENT FastString -- comment pseudo-op
325 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
326 | LABEL CLabel -- global label pseudo-op
327 | ASCII Bool -- True <=> needs backslash conversion
328 String -- the literal string
331 | DELTA Int -- specify current stack offset for
332 -- benefit of subsequent passes
336 #if alpha_TARGET_ARCH
338 -- data Instr continues...
342 | LD Size Reg MachRegsAddr -- size, dst, src
343 | LDA Reg MachRegsAddr -- dst, src
344 | LDAH Reg MachRegsAddr -- dst, src
345 | LDGP Reg MachRegsAddr -- dst, src
346 | LDI Size Reg Imm -- size, dst, src
347 | ST Size Reg MachRegsAddr -- size, src, dst
352 | ABS Size RI Reg -- size, src, dst
353 | NEG Size Bool RI Reg -- size, overflow, src, dst
354 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
355 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
356 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
357 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
358 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
359 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
360 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
362 -- Simple bit-twiddling.
382 | CMP Cond Reg RI Reg
389 | FADD Size Reg Reg Reg
390 | FDIV Size Reg Reg Reg
391 | FMUL Size Reg Reg Reg
392 | FSUB Size Reg Reg Reg
393 | CVTxy Size Size Reg Reg
394 | FCMP Size Cond Reg Reg Reg
402 | JMP Reg MachRegsAddr Int
404 | JSR Reg MachRegsAddr Int
406 -- Alpha-specific pseudo-ops.
415 #endif {- alpha_TARGET_ARCH -}
418 Intel, in their infinite wisdom, selected a stack model for floating
419 point registers on x86. That might have made sense back in 1979 --
420 nowadays we can see it for the nonsense it really is. A stack model
421 fits poorly with the existing nativeGen infrastructure, which assumes
422 flat integer and FP register sets. Prior to this commit, nativeGen
423 could not generate correct x86 FP code -- to do so would have meant
424 somehow working the register-stack paradigm into the register
425 allocator and spiller, which sounds very difficult.
427 We have decided to cheat, and go for a simple fix which requires no
428 infrastructure modifications, at the expense of generating ropey but
429 correct FP code. All notions of the x86 FP stack and its insns have
430 been removed. Instead, we pretend (to the instruction selector and
431 register allocator) that x86 has six floating point registers, %fake0
432 .. %fake5, which can be used in the usual flat manner. We further
433 claim that x86 has floating point instructions very similar to SPARC
434 and Alpha, that is, a simple 3-operand register-register arrangement.
435 Code generation and register allocation proceed on this basis.
437 When we come to print out the final assembly, our convenient fiction
438 is converted to dismal reality. Each fake instruction is
439 independently converted to a series of real x86 instructions.
440 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
441 arithmetic operations, the two operands are pushed onto the top of the
442 FP stack, the operation done, and the result copied back into the
443 relevant register. There are only six %fake registers because 2 are
444 needed for the translation, and x86 has 8 in total.
446 The translation is inefficient but is simple and it works. A cleverer
447 translation would handle a sequence of insns, simulating the FP stack
448 contents, would not impose a fixed mapping from %fake to %st regs, and
449 hopefully could avoid most of the redundant reg-reg moves of the
452 We might as well make use of whatever unique FP facilities Intel have
453 chosen to bless us with (let's not be churlish, after all).
454 Hence GLDZ and GLD1. Bwahahahahahahaha!
456 LATER (10 Nov 2000): idiv gives problems with the register spiller,
457 because the spiller is simpleminded and because idiv has fixed uses of
458 %eax and %edx. Rather than make the spiller cleverer, we do away with
459 idiv, and instead have iquot and irem fake (integer) insns, which have
460 no operand register constraints -- ie, they behave like add, sub, mul.
461 The printer-outer transforms them to a sequence of real insns which does
462 the Right Thing (tm). As with the FP stuff, this gives ropey code,
463 but we don't care, since it doesn't get used much. We hope.
468 -- data Instr continues...
472 | MOV Size Operand Operand
473 | MOVZxL Size Operand Operand -- size is the size of operand 1
474 | MOVSxL Size Operand Operand -- size is the size of operand 1
476 -- Load effective address (also a very useful three-operand add instruction :-)
478 | LEA Size Operand Operand
482 | ADD Size Operand Operand
483 | SUB Size Operand Operand
484 | IMUL Size Operand Operand -- signed int mul
485 | MUL Size Operand Operand -- unsigned int mul
486 | IMUL64 Reg Reg -- 32 x 32 -> 64 signed mul
487 -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
489 -- Quotient and remainder. SEE comment above -- these are not
490 -- real x86 insns; instead they are expanded when printed
491 -- into a sequence of real insns.
493 | IQUOT Size Operand Operand -- signed quotient
494 | IREM Size Operand Operand -- signed remainder
495 | QUOT Size Operand Operand -- unsigned quotient
496 | REM Size Operand Operand -- unsigned remainder
498 -- Simple bit-twiddling.
500 | AND Size Operand Operand
501 | OR Size Operand Operand
502 | XOR Size Operand Operand
504 | NEGI Size Operand -- NEG instruction (name clash with Cond)
505 | SHL Size Imm Operand -- Only immediate shifts allowed
506 | SAR Size Imm Operand -- Only immediate shifts allowed
507 | SHR Size Imm Operand -- Only immediate shifts allowed
508 | BT Size Imm Operand
513 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
514 -- as single instructions right up until we spit them out.
516 -- all the 3-operand fake fp insns are src1 src2 dst
517 -- and furthermore are constrained to be fp regs only.
518 -- IMPORTANT: keep is_G_insn up to date with any changes here
519 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
520 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
521 | GST Size Reg MachRegsAddr -- src(fpreg), dst
523 | GLDZ Reg -- dst(fpreg)
524 | GLD1 Reg -- dst(fpreg)
526 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
527 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
529 | GITOF Reg Reg -- src(intreg), dst(fpreg)
530 | GITOD Reg Reg -- src(intreg), dst(fpreg)
532 | GADD Size Reg Reg Reg -- src1, src2, dst
533 | GDIV Size Reg Reg Reg -- src1, src2, dst
534 | GSUB Size Reg Reg Reg -- src1, src2, dst
535 | GMUL Size Reg Reg Reg -- src1, src2, dst
537 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
538 -- Compare src1 with src2; set the Zero flag iff the numbers are
539 -- comparable and the comparison is True. Subsequent code must
540 -- test the %eflags zero flag regardless of the supplied Cond.
541 | GCMP Cond Reg Reg -- src1, src2
543 | GABS Size Reg Reg -- src, dst
544 | GNEG Size Reg Reg -- src, dst
545 | GSQRT Size Reg Reg -- src, dst
546 | GSIN Size Reg Reg -- src, dst
547 | GCOS Size Reg Reg -- src, dst
548 | GTAN Size Reg Reg -- src, dst
550 | GFREE -- do ffree on all x86 regs; an ugly hack
553 | TEST Size Operand Operand
554 | CMP Size Operand Operand
566 | JMP DestInfo Operand -- possible dests, target
567 | JXX Cond CLabel -- target
568 | CALL (Either Imm Reg)
572 | CLTD -- sign extend %eax into %edx:%eax
575 = OpReg Reg -- register
576 | OpImm Imm -- immediate value
577 | OpAddr MachRegsAddr -- memory reference
580 i386_insert_ffrees :: [Instr] -> [Instr]
581 i386_insert_ffrees insns
582 | any is_G_instr insns
583 = concatMap ffree_before_nonlocal_transfers insns
587 ffree_before_nonlocal_transfers insn
589 CALL _ -> [GFREE, insn]
590 -- Jumps to immediate labels are local
591 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
592 -- If a jump mentions dests, it is a local jump thru
594 JMP (DestInfo _) _ -> [insn]
595 JMP _ _ -> [GFREE, insn]
599 -- if you ever add a new FP insn to the fake x86 FP insn set,
600 -- you must update this too
601 is_G_instr :: Instr -> Bool
604 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
605 GLDZ _ -> True; GLD1 _ -> True;
606 GFTOI _ _ -> True; GDTOI _ _ -> True;
607 GITOF _ _ -> True; GITOD _ _ -> True;
608 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
609 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
610 GCMP _ _ _ -> True; GABS _ _ _ -> True
611 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
612 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
613 GFREE -> panic "is_G_instr: GFREE (!)"
616 #endif {- i386_TARGET_ARCH -}
620 #if sparc_TARGET_ARCH
622 -- data Instr continues...
626 | LD Size MachRegsAddr Reg -- size, src, dst
627 | ST Size Reg MachRegsAddr -- size, src, dst
631 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
632 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
633 | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
634 | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
635 | RDY Reg -- move contents of Y register to reg
637 -- Simple bit-twiddling.
639 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
640 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
641 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
642 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
643 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
644 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
645 | SLL Reg RI Reg -- src1, src2, dst
646 | SRL Reg RI Reg -- src1, src2, dst
647 | SRA Reg RI Reg -- src1, src2, dst
648 | SETHI Imm Reg -- src, dst
649 | NOP -- Really SETHI 0, %g0, but worth an alias
653 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
654 -- right up until we spit them out.
656 | FABS Size Reg Reg -- src dst
657 | FADD Size Reg Reg Reg -- src1, src2, dst
658 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
659 | FDIV Size Reg Reg Reg -- src1, src2, dst
660 | FMOV Size Reg Reg -- src, dst
661 | FMUL Size Reg Reg Reg -- src1, src2, dst
662 | FNEG Size Reg Reg -- src, dst
663 | FSQRT Size Reg Reg -- src, dst
664 | FSUB Size Reg Reg Reg -- src1, src2, dst
665 | FxTOy Size Size Reg Reg -- src, dst
669 | BI Cond Bool Imm -- cond, annul?, target
670 | BF Cond Bool Imm -- cond, annul?, target
672 | JMP DestInfo MachRegsAddr -- target
673 | CALL (Either Imm Reg) Int Bool -- target, args, terminal
680 riZero (RIImm (ImmInt 0)) = True
681 riZero (RIImm (ImmInteger 0)) = True
682 riZero (RIReg (RealReg 0)) = True
685 -- Calculate the effective address which would be used by the
686 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
687 -- alas -- can't have fpRelEA here because of module dependencies.
688 fpRelEA :: Int -> Reg -> Instr
690 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
692 -- Code to shift the stack pointer by n words.
693 moveSp :: Int -> Instr
695 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
697 -- Produce the second-half-of-a-double register given the first half.
699 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
700 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
701 #endif {- sparc_TARGET_ARCH -}