2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( callerSaves,
46 get_MagicId_addr, get_MagicId_reg_or_addr,
47 Imm(..), Reg(..), MachRegsAddr(..)
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixStmt(..), StixExpr(..), StixReg(..),
54 CodeSegment, DestInfo(..) )
55 import Panic ( panic )
57 import Outputable ( pprPanic, ppr, showSDoc )
58 import IOExts ( trace )
59 import Config ( cLeadingUnderscore )
62 import Maybe ( catMaybes )
66 underscorePrefix :: Bool -- leading underscore on assembler labels?
67 underscorePrefix = (cLeadingUnderscore == "YES")
69 ---------------------------
70 fmtAsmLbl :: String -> String -- for formatting labels
74 {- The alpha assembler likes temporary labels to look like $L123
75 instead of L123. (Don't toss the L, because then Lf28
84 % ----------------------------------------------------------------
86 We (allegedly) put the first six C-call arguments in registers;
87 where do we start putting the rest of them?
89 eXTRA_STK_ARGS_HERE :: Int
91 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96 Now the volatile saves and restores. We add the basic guys to the
97 list of ``user'' registers provided. Note that there are more basic
98 registers on the restore list, because some are reloaded from
101 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
104 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
106 volatileSaves = volatileSavesOrRestores True
107 volatileRestores = volatileSavesOrRestores False
109 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
110 restore_cands = save_cands
112 volatileSavesOrRestores do_saves vols
113 = catMaybes (map mkCode vols)
116 | not (callerSaves mid)
118 | otherwise -- must be callee-saves ...
119 = case get_MagicId_reg_or_addr mid of
120 -- If stored in BaseReg, we ain't interested
124 -- OK, it's callee-saves, and in a real reg (rrno).
125 -- We have to cook up some transfer code.
126 {- Note that the use of (StixMagicId mid) here is a bit subtle.
127 Here, we only create those for MagicIds which are stored in
128 a real reg on this arch -- the preceding case on the result
129 of get_MagicId_reg_or_addr guarantees this. Later, when
130 selecting insns, that means these assignments are sure to turn
131 into real reg-to-mem or mem-to-reg moves, rather than being
132 pointless moves from some address in the reg-table
135 -> Just (StAssignMem rep addr
136 (StReg (StixMagicId mid)))
138 -> Just (StAssignReg rep (StixMagicId mid)
141 rep = magicIdPrimRep mid
142 addr = get_MagicId_addr mid
145 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147 Obviously slightly weedy
148 (Note that the floating point values aren't terribly important.)
151 targetMinDouble = MachDouble (-1.7976931348623157e+308)
152 targetMaxDouble = MachDouble (1.7976931348623157e+308)
153 targetMinInt = mkMachInt (-2147483648)
154 targetMaxInt = mkMachInt 2147483647
157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 This algorithm for determining the $\log_2$ of exact powers of 2 comes
160 from GCC. It requires bit manipulation primitives, and we use GHC
167 exactLog2 :: Integer -> Maybe Integer
169 = if (x <= 0 || x >= 2147483648) then
172 case iUnbox (fromInteger x) of { x# ->
173 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
176 Just (toInteger (iBox (pow2 x#)))
179 #if __GLASGOW_HASKELL__ >= 503
180 shiftr x y = uncheckedShiftRL# x y
182 shiftr x y = shiftRL# x y
185 pow2 x# | x# ==# 1# = 0#
186 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
189 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
193 #if alpha_TARGET_ARCH
194 = ALWAYS -- For BI (same as BR)
195 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
197 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
198 | LE -- For CMP and BI
199 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
201 | NEVER -- For BI (null instruction)
202 | ULE -- For CMP only
203 | ULT -- For CMP only
206 = ALWAYS -- What's really used? ToDo
222 #if sparc_TARGET_ARCH
223 = ALWAYS -- What's really used? ToDo
244 #if alpha_TARGET_ARCH
247 -- | W -- word (2 bytes): UNUSED
249 | L -- longword (4 bytes)
250 | Q -- quadword (8 bytes)
251 -- | FF -- VAX F-style floating pt: UNUSED
252 -- | GF -- VAX G-style floating pt: UNUSED
253 -- | DF -- VAX D-style floating pt: UNUSED
254 -- | SF -- IEEE single-precision floating pt: UNUSED
255 | TF -- IEEE double-precision floating pt
259 | Bu -- byte (unsigned)
261 | Wu -- word (unsigned)
262 | L -- longword (signed)
263 | Lu -- longword (unsigned)
264 | F -- IEEE single-precision floating pt
265 | DF -- IEEE single-precision floating pt
266 | F80 -- Intel 80-bit internal FP format; only used for spilling
268 #if sparc_TARGET_ARCH
270 | Bu -- byte (unsigned)
271 | W -- word (4 bytes)
272 | F -- IEEE single-precision floating pt
273 | DF -- IEEE single-precision floating pt
276 primRepToSize :: PrimRep -> Size
278 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
279 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
280 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
281 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
282 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
283 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
285 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
286 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(err,)))
287 where err = primRepToSize_fail "Int16Rep"
288 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
289 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
290 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
291 where err = primRepToSize_fail "Word16Rep"
292 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
294 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
295 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
296 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
298 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
299 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
300 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
301 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
302 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
303 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
304 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
305 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
306 primRepToSize StableNameRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
307 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
309 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
310 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
311 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
313 primRepToSize_fail str
314 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
315 ++ "Workaround: use -fvia-C.\n\t"
316 ++ "Perhaps you should report it as a GHC bug,\n\t"
317 ++ "to glasgow-haskell-bugs@haskell.org.")
321 %************************************************************************
323 \subsection{Machine's assembly language}
325 %************************************************************************
327 We have a few common ``instructions'' (nearly all the pseudo-ops) but
328 mostly all of @Instr@ is machine-specific.
332 = COMMENT FAST_STRING -- comment pseudo-op
333 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
334 | LABEL CLabel -- global label pseudo-op
335 | ASCII Bool -- True <=> needs backslash conversion
336 String -- the literal string
339 | DELTA Int -- specify current stack offset for
340 -- benefit of subsequent passes
344 #if alpha_TARGET_ARCH
346 -- data Instr continues...
350 | LD Size Reg MachRegsAddr -- size, dst, src
351 | LDA Reg MachRegsAddr -- dst, src
352 | LDAH Reg MachRegsAddr -- dst, src
353 | LDGP Reg MachRegsAddr -- dst, src
354 | LDI Size Reg Imm -- size, dst, src
355 | ST Size Reg MachRegsAddr -- size, src, dst
360 | ABS Size RI Reg -- size, src, dst
361 | NEG Size Bool RI Reg -- size, overflow, src, dst
362 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
363 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
364 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
365 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
366 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
367 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
368 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
370 -- Simple bit-twiddling.
390 | CMP Cond Reg RI Reg
397 | FADD Size Reg Reg Reg
398 | FDIV Size Reg Reg Reg
399 | FMUL Size Reg Reg Reg
400 | FSUB Size Reg Reg Reg
401 | CVTxy Size Size Reg Reg
402 | FCMP Size Cond Reg Reg Reg
410 | JMP Reg MachRegsAddr Int
412 | JSR Reg MachRegsAddr Int
414 -- Alpha-specific pseudo-ops.
423 #endif {- alpha_TARGET_ARCH -}
426 Intel, in their infinite wisdom, selected a stack model for floating
427 point registers on x86. That might have made sense back in 1979 --
428 nowadays we can see it for the nonsense it really is. A stack model
429 fits poorly with the existing nativeGen infrastructure, which assumes
430 flat integer and FP register sets. Prior to this commit, nativeGen
431 could not generate correct x86 FP code -- to do so would have meant
432 somehow working the register-stack paradigm into the register
433 allocator and spiller, which sounds very difficult.
435 We have decided to cheat, and go for a simple fix which requires no
436 infrastructure modifications, at the expense of generating ropey but
437 correct FP code. All notions of the x86 FP stack and its insns have
438 been removed. Instead, we pretend (to the instruction selector and
439 register allocator) that x86 has six floating point registers, %fake0
440 .. %fake5, which can be used in the usual flat manner. We further
441 claim that x86 has floating point instructions very similar to SPARC
442 and Alpha, that is, a simple 3-operand register-register arrangement.
443 Code generation and register allocation proceed on this basis.
445 When we come to print out the final assembly, our convenient fiction
446 is converted to dismal reality. Each fake instruction is
447 independently converted to a series of real x86 instructions.
448 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
449 arithmetic operations, the two operands are pushed onto the top of the
450 FP stack, the operation done, and the result copied back into the
451 relevant register. There are only six %fake registers because 2 are
452 needed for the translation, and x86 has 8 in total.
454 The translation is inefficient but is simple and it works. A cleverer
455 translation would handle a sequence of insns, simulating the FP stack
456 contents, would not impose a fixed mapping from %fake to %st regs, and
457 hopefully could avoid most of the redundant reg-reg moves of the
460 We might as well make use of whatever unique FP facilities Intel have
461 chosen to bless us with (let's not be churlish, after all).
462 Hence GLDZ and GLD1. Bwahahahahahahaha!
464 LATER (10 Nov 2000): idiv gives problems with the register spiller,
465 because the spiller is simpleminded and because idiv has fixed uses of
466 %eax and %edx. Rather than make the spiller cleverer, we do away with
467 idiv, and instead have iquot and irem fake (integer) insns, which have
468 no operand register constraints -- ie, they behave like add, sub, mul.
469 The printer-outer transforms them to a sequence of real insns which does
470 the Right Thing (tm). As with the FP stuff, this gives ropey code,
471 but we don't care, since it doesn't get used much. We hope.
476 -- data Instr continues...
480 | MOV Size Operand Operand
481 | MOVZxL Size Operand Operand -- size is the size of operand 1
482 | MOVSxL Size Operand Operand -- size is the size of operand 1
484 -- Load effective address (also a very useful three-operand add instruction :-)
486 | LEA Size Operand Operand
490 | ADD Size Operand Operand
491 | SUB Size Operand Operand
492 | IMUL Size Operand Operand -- signed int mul
493 | MUL Size Operand Operand -- unsigned int mul
495 -- Quotient and remainder. SEE comment above -- these are not
496 -- real x86 insns; instead they are expanded when printed
497 -- into a sequence of real insns.
499 | IQUOT Size Operand Operand -- signed quotient
500 | IREM Size Operand Operand -- signed remainder
501 | QUOT Size Operand Operand -- unsigned quotient
502 | REM Size Operand Operand -- unsigned remainder
504 -- Simple bit-twiddling.
506 | AND Size Operand Operand
507 | OR Size Operand Operand
508 | XOR Size Operand Operand
510 | NEGI Size Operand -- NEG instruction (name clash with Cond)
511 | SHL Size Imm Operand -- Only immediate shifts allowed
512 | SAR Size Imm Operand -- Only immediate shifts allowed
513 | SHR Size Imm Operand -- Only immediate shifts allowed
514 | BT Size Imm Operand
519 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
520 -- as single instructions right up until we spit them out.
522 -- all the 3-operand fake fp insns are src1 src2 dst
523 -- and furthermore are constrained to be fp regs only.
524 -- IMPORTANT: keep is_G_insn up to date with any changes here
525 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
526 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
527 | GST Size Reg MachRegsAddr -- src(fpreg), dst
529 | GLDZ Reg -- dst(fpreg)
530 | GLD1 Reg -- dst(fpreg)
532 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
533 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
535 | GITOF Reg Reg -- src(intreg), dst(fpreg)
536 | GITOD Reg Reg -- src(intreg), dst(fpreg)
538 | GADD Size Reg Reg Reg -- src1, src2, dst
539 | GDIV Size Reg Reg Reg -- src1, src2, dst
540 | GSUB Size Reg Reg Reg -- src1, src2, dst
541 | GMUL Size Reg Reg Reg -- src1, src2, dst
543 | GCMP Size Reg Reg -- src1, src2
545 | GABS Size Reg Reg -- src, dst
546 | GNEG Size Reg Reg -- src, dst
547 | GSQRT Size Reg Reg -- src, dst
548 | GSIN Size Reg Reg -- src, dst
549 | GCOS Size Reg Reg -- src, dst
550 | GTAN Size Reg Reg -- src, dst
552 | GFREE -- do ffree on all x86 regs; an ugly hack
555 | TEST Size Operand Operand
556 | CMP Size Operand Operand
568 | JMP DestInfo Operand -- possible dests, target
569 | JXX Cond CLabel -- target
574 | CLTD -- sign extend %eax into %edx:%eax
577 = OpReg Reg -- register
578 | OpImm Imm -- immediate value
579 | OpAddr MachRegsAddr -- memory reference
582 i386_insert_ffrees :: [Instr] -> [Instr]
583 i386_insert_ffrees insns
584 | any is_G_instr insns
585 = concatMap ffree_before_nonlocal_transfers insns
589 ffree_before_nonlocal_transfers insn
591 CALL _ -> [GFREE, insn]
592 -- Jumps to immediate labels are local
593 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
594 -- If a jump mentions dests, it is a local jump thru
596 JMP (DestInfo _) _ -> [insn]
597 JMP _ _ -> [GFREE, insn]
601 -- if you ever add a new FP insn to the fake x86 FP insn set,
602 -- you must update this too
603 is_G_instr :: Instr -> Bool
606 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
607 GLDZ _ -> True; GLD1 _ -> True;
608 GFTOI _ _ -> True; GDTOI _ _ -> True;
609 GITOF _ _ -> True; GITOD _ _ -> True;
610 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
611 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
612 GCMP _ _ _ -> True; GABS _ _ _ -> True
613 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
614 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
615 GFREE -> panic "is_G_instr: GFREE (!)"
618 #endif {- i386_TARGET_ARCH -}
622 #if sparc_TARGET_ARCH
624 -- data Instr continues...
628 | LD Size MachRegsAddr Reg -- size, src, dst
629 | ST Size Reg MachRegsAddr -- size, src, dst
633 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
634 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
636 -- Simple bit-twiddling.
638 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
639 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
640 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
641 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
642 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
643 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
644 | SLL Reg RI Reg -- src1, src2, dst
645 | SRL Reg RI Reg -- src1, src2, dst
646 | SRA Reg RI Reg -- src1, src2, dst
647 | SETHI Imm Reg -- src, dst
648 | NOP -- Really SETHI 0, %g0, but worth an alias
652 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
653 -- right up until we spit them out.
655 | FABS Size Reg Reg -- src dst
656 | FADD Size Reg Reg Reg -- src1, src2, dst
657 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
658 | FDIV Size Reg Reg Reg -- src1, src2, dst
659 | FMOV Size Reg Reg -- src, dst
660 | FMUL Size Reg Reg Reg -- src1, src2, dst
661 | FNEG Size Reg Reg -- src, dst
662 | FSQRT Size Reg Reg -- src, dst
663 | FSUB Size Reg Reg Reg -- src1, src2, dst
664 | FxTOy Size Size Reg Reg -- src, dst
668 | BI Cond Bool Imm -- cond, annul?, target
669 | BF Cond Bool Imm -- cond, annul?, target
671 | JMP DestInfo MachRegsAddr -- target
672 | CALL Imm Int Bool -- target, args, terminal
679 riZero (RIImm (ImmInt 0)) = True
680 riZero (RIImm (ImmInteger 0)) = True
681 riZero (RIReg (RealReg 0)) = True
684 -- Calculate the effective address which would be used by the
685 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
686 -- alas -- can't have fpRelEA here because of module dependencies.
687 fpRelEA :: Int -> Reg -> Instr
689 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
691 -- Code to shift the stack pointer by n words.
692 moveSp :: Int -> Instr
694 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
696 -- Produce the second-half-of-a-double register given the first half.
698 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
699 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
700 #endif {- sparc_TARGET_ARCH -}