2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( callerSaves,
46 get_MagicId_addr, get_MagicId_reg_or_addr,
47 Imm(..), Reg(..), MachRegsAddr(..)
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixStmt(..), StixExpr(..), StixReg(..),
54 CodeSegment, DestInfo(..) )
55 import Panic ( panic )
57 import Outputable ( pprPanic, ppr, showSDoc )
58 import IOExts ( trace )
59 import Config ( cLeadingUnderscore )
62 import Maybe ( catMaybes )
66 underscorePrefix :: Bool -- leading underscore on assembler labels?
67 underscorePrefix = (cLeadingUnderscore == "YES")
69 ---------------------------
70 fmtAsmLbl :: String -> String -- for formatting labels
74 {- The alpha assembler likes temporary labels to look like $L123
75 instead of L123. (Don't toss the L, because then Lf28
84 % ----------------------------------------------------------------
86 We (allegedly) put the first six C-call arguments in registers;
87 where do we start putting the rest of them?
89 eXTRA_STK_ARGS_HERE :: Int
91 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96 Now the volatile saves and restores. We add the basic guys to the
97 list of ``user'' registers provided. Note that there are more basic
98 registers on the restore list, because some are reloaded from
101 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
104 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
106 volatileSaves = volatileSavesOrRestores True
107 volatileRestores = volatileSavesOrRestores False
109 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
110 restore_cands = save_cands
112 volatileSavesOrRestores do_saves vols
113 = catMaybes (map mkCode vols)
116 | not (callerSaves mid)
118 | otherwise -- must be callee-saves ...
119 = case get_MagicId_reg_or_addr mid of
120 -- If stored in BaseReg, we ain't interested
124 -- OK, it's callee-saves, and in a real reg (rrno).
125 -- We have to cook up some transfer code.
126 {- Note that the use of (StixMagicId mid) here is a bit subtle.
127 Here, we only create those for MagicIds which are stored in
128 a real reg on this arch -- the preceding case on the result
129 of get_MagicId_reg_or_addr guarantees this. Later, when
130 selecting insns, that means these assignments are sure to turn
131 into real reg-to-mem or mem-to-reg moves, rather than being
132 pointless moves from some address in the reg-table
135 -> Just (StAssignMem rep addr
136 (StReg (StixMagicId mid)))
138 -> Just (StAssignReg rep (StixMagicId mid)
141 rep = magicIdPrimRep mid
142 addr = get_MagicId_addr mid
145 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147 Obviously slightly weedy
148 (Note that the floating point values aren't terribly important.)
151 targetMinDouble = MachDouble (-1.7976931348623157e+308)
152 targetMaxDouble = MachDouble (1.7976931348623157e+308)
153 targetMinInt = mkMachInt (-2147483648)
154 targetMaxInt = mkMachInt 2147483647
157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 This algorithm for determining the $\log_2$ of exact powers of 2 comes
160 from GCC. It requires bit manipulation primitives, and we use GHC
167 exactLog2 :: Integer -> Maybe Integer
169 = if (x <= 0 || x >= 2147483648) then
172 case iUnbox (fromInteger x) of { x# ->
173 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
176 Just (toInteger (iBox (pow2 x#)))
179 pow2 x# | x# ==# 1# = 0#
180 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
182 #if __GLASGOW_HASKELL__ >= 503
183 shiftr x y = uncheckedShiftRL# x y
185 shiftr x y = shiftRL# x y
189 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
193 #if alpha_TARGET_ARCH
194 = ALWAYS -- For BI (same as BR)
195 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
197 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
198 | LE -- For CMP and BI
199 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
201 | NEVER -- For BI (null instruction)
202 | ULE -- For CMP only
203 | ULT -- For CMP only
206 = ALWAYS -- What's really used? ToDo
222 #if sparc_TARGET_ARCH
223 = ALWAYS -- What's really used? ToDo
244 #if alpha_TARGET_ARCH
247 -- | W -- word (2 bytes): UNUSED
249 | L -- longword (4 bytes)
250 | Q -- quadword (8 bytes)
251 -- | FF -- VAX F-style floating pt: UNUSED
252 -- | GF -- VAX G-style floating pt: UNUSED
253 -- | DF -- VAX D-style floating pt: UNUSED
254 -- | SF -- IEEE single-precision floating pt: UNUSED
255 | TF -- IEEE double-precision floating pt
259 | Bu -- byte (unsigned)
261 | Wu -- word (unsigned)
262 | L -- longword (signed)
263 | Lu -- longword (unsigned)
264 | F -- IEEE single-precision floating pt
265 | DF -- IEEE single-precision floating pt
266 | F80 -- Intel 80-bit internal FP format; only used for spilling
268 #if sparc_TARGET_ARCH
270 | Bu -- byte (unsigned)
271 | H -- halfword (signed, 2 bytes)
272 | Hu -- halfword (unsigned, 2 bytes)
273 | W -- word (4 bytes)
274 | F -- IEEE single-precision floating pt
275 | DF -- IEEE single-precision floating pt
278 primRepToSize :: PrimRep -> Size
280 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
281 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
282 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
283 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
284 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
285 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
287 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
288 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(H, )))
289 where err = primRepToSize_fail "Int16Rep"
290 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
291 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
292 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(Hu, )))
293 where err = primRepToSize_fail "Word16Rep"
294 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
296 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
298 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
299 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
300 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
301 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
302 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
303 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
304 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
305 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
306 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
307 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
308 primRepToSize StableNameRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
309 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
311 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
312 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
313 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
315 primRepToSize_fail str
316 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
317 ++ "Workaround: use -fvia-C.\n\t"
318 ++ "Perhaps you should report it as a GHC bug,\n\t"
319 ++ "to glasgow-haskell-bugs@haskell.org.")
323 %************************************************************************
325 \subsection{Machine's assembly language}
327 %************************************************************************
329 We have a few common ``instructions'' (nearly all the pseudo-ops) but
330 mostly all of @Instr@ is machine-specific.
334 = COMMENT FAST_STRING -- comment pseudo-op
335 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
336 | LABEL CLabel -- global label pseudo-op
337 | ASCII Bool -- True <=> needs backslash conversion
338 String -- the literal string
341 | DELTA Int -- specify current stack offset for
342 -- benefit of subsequent passes
346 #if alpha_TARGET_ARCH
348 -- data Instr continues...
352 | LD Size Reg MachRegsAddr -- size, dst, src
353 | LDA Reg MachRegsAddr -- dst, src
354 | LDAH Reg MachRegsAddr -- dst, src
355 | LDGP Reg MachRegsAddr -- dst, src
356 | LDI Size Reg Imm -- size, dst, src
357 | ST Size Reg MachRegsAddr -- size, src, dst
362 | ABS Size RI Reg -- size, src, dst
363 | NEG Size Bool RI Reg -- size, overflow, src, dst
364 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
365 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
366 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
367 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
368 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
369 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
370 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
372 -- Simple bit-twiddling.
392 | CMP Cond Reg RI Reg
399 | FADD Size Reg Reg Reg
400 | FDIV Size Reg Reg Reg
401 | FMUL Size Reg Reg Reg
402 | FSUB Size Reg Reg Reg
403 | CVTxy Size Size Reg Reg
404 | FCMP Size Cond Reg Reg Reg
412 | JMP Reg MachRegsAddr Int
414 | JSR Reg MachRegsAddr Int
416 -- Alpha-specific pseudo-ops.
425 #endif {- alpha_TARGET_ARCH -}
428 Intel, in their infinite wisdom, selected a stack model for floating
429 point registers on x86. That might have made sense back in 1979 --
430 nowadays we can see it for the nonsense it really is. A stack model
431 fits poorly with the existing nativeGen infrastructure, which assumes
432 flat integer and FP register sets. Prior to this commit, nativeGen
433 could not generate correct x86 FP code -- to do so would have meant
434 somehow working the register-stack paradigm into the register
435 allocator and spiller, which sounds very difficult.
437 We have decided to cheat, and go for a simple fix which requires no
438 infrastructure modifications, at the expense of generating ropey but
439 correct FP code. All notions of the x86 FP stack and its insns have
440 been removed. Instead, we pretend (to the instruction selector and
441 register allocator) that x86 has six floating point registers, %fake0
442 .. %fake5, which can be used in the usual flat manner. We further
443 claim that x86 has floating point instructions very similar to SPARC
444 and Alpha, that is, a simple 3-operand register-register arrangement.
445 Code generation and register allocation proceed on this basis.
447 When we come to print out the final assembly, our convenient fiction
448 is converted to dismal reality. Each fake instruction is
449 independently converted to a series of real x86 instructions.
450 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
451 arithmetic operations, the two operands are pushed onto the top of the
452 FP stack, the operation done, and the result copied back into the
453 relevant register. There are only six %fake registers because 2 are
454 needed for the translation, and x86 has 8 in total.
456 The translation is inefficient but is simple and it works. A cleverer
457 translation would handle a sequence of insns, simulating the FP stack
458 contents, would not impose a fixed mapping from %fake to %st regs, and
459 hopefully could avoid most of the redundant reg-reg moves of the
462 We might as well make use of whatever unique FP facilities Intel have
463 chosen to bless us with (let's not be churlish, after all).
464 Hence GLDZ and GLD1. Bwahahahahahahaha!
466 LATER (10 Nov 2000): idiv gives problems with the register spiller,
467 because the spiller is simpleminded and because idiv has fixed uses of
468 %eax and %edx. Rather than make the spiller cleverer, we do away with
469 idiv, and instead have iquot and irem fake (integer) insns, which have
470 no operand register constraints -- ie, they behave like add, sub, mul.
471 The printer-outer transforms them to a sequence of real insns which does
472 the Right Thing (tm). As with the FP stuff, this gives ropey code,
473 but we don't care, since it doesn't get used much. We hope.
478 -- data Instr continues...
482 | MOV Size Operand Operand
483 | MOVZxL Size Operand Operand -- size is the size of operand 1
484 | MOVSxL Size Operand Operand -- size is the size of operand 1
486 -- Load effective address (also a very useful three-operand add instruction :-)
488 | LEA Size Operand Operand
492 | ADD Size Operand Operand
493 | SUB Size Operand Operand
494 | IMUL Size Operand Operand -- signed int mul
495 | MUL Size Operand Operand -- unsigned int mul
496 | IMUL64 Reg Reg -- 32 x 32 -> 64 signed mul
497 -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
499 -- Quotient and remainder. SEE comment above -- these are not
500 -- real x86 insns; instead they are expanded when printed
501 -- into a sequence of real insns.
503 | IQUOT Size Operand Operand -- signed quotient
504 | IREM Size Operand Operand -- signed remainder
505 | QUOT Size Operand Operand -- unsigned quotient
506 | REM Size Operand Operand -- unsigned remainder
508 -- Simple bit-twiddling.
510 | AND Size Operand Operand
511 | OR Size Operand Operand
512 | XOR Size Operand Operand
514 | NEGI Size Operand -- NEG instruction (name clash with Cond)
515 | SHL Size Imm Operand -- Only immediate shifts allowed
516 | SAR Size Imm Operand -- Only immediate shifts allowed
517 | SHR Size Imm Operand -- Only immediate shifts allowed
518 | BT Size Imm Operand
523 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
524 -- as single instructions right up until we spit them out.
526 -- all the 3-operand fake fp insns are src1 src2 dst
527 -- and furthermore are constrained to be fp regs only.
528 -- IMPORTANT: keep is_G_insn up to date with any changes here
529 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
530 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
531 | GST Size Reg MachRegsAddr -- src(fpreg), dst
533 | GLDZ Reg -- dst(fpreg)
534 | GLD1 Reg -- dst(fpreg)
536 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
537 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
539 | GITOF Reg Reg -- src(intreg), dst(fpreg)
540 | GITOD Reg Reg -- src(intreg), dst(fpreg)
542 | GADD Size Reg Reg Reg -- src1, src2, dst
543 | GDIV Size Reg Reg Reg -- src1, src2, dst
544 | GSUB Size Reg Reg Reg -- src1, src2, dst
545 | GMUL Size Reg Reg Reg -- src1, src2, dst
547 | GCMP Size Reg Reg -- src1, src2
549 | GABS Size Reg Reg -- src, dst
550 | GNEG Size Reg Reg -- src, dst
551 | GSQRT Size Reg Reg -- src, dst
552 | GSIN Size Reg Reg -- src, dst
553 | GCOS Size Reg Reg -- src, dst
554 | GTAN Size Reg Reg -- src, dst
556 | GFREE -- do ffree on all x86 regs; an ugly hack
559 | TEST Size Operand Operand
560 | CMP Size Operand Operand
572 | JMP DestInfo Operand -- possible dests, target
573 | JXX Cond CLabel -- target
578 | CLTD -- sign extend %eax into %edx:%eax
581 = OpReg Reg -- register
582 | OpImm Imm -- immediate value
583 | OpAddr MachRegsAddr -- memory reference
586 i386_insert_ffrees :: [Instr] -> [Instr]
587 i386_insert_ffrees insns
588 | any is_G_instr insns
589 = concatMap ffree_before_nonlocal_transfers insns
593 ffree_before_nonlocal_transfers insn
595 CALL _ -> [GFREE, insn]
596 -- Jumps to immediate labels are local
597 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
598 -- If a jump mentions dests, it is a local jump thru
600 JMP (DestInfo _) _ -> [insn]
601 JMP _ _ -> [GFREE, insn]
605 -- if you ever add a new FP insn to the fake x86 FP insn set,
606 -- you must update this too
607 is_G_instr :: Instr -> Bool
610 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
611 GLDZ _ -> True; GLD1 _ -> True;
612 GFTOI _ _ -> True; GDTOI _ _ -> True;
613 GITOF _ _ -> True; GITOD _ _ -> True;
614 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
615 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
616 GCMP _ _ _ -> True; GABS _ _ _ -> True
617 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
618 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
619 GFREE -> panic "is_G_instr: GFREE (!)"
622 #endif {- i386_TARGET_ARCH -}
626 #if sparc_TARGET_ARCH
628 -- data Instr continues...
632 | LD Size MachRegsAddr Reg -- size, src, dst
633 | ST Size Reg MachRegsAddr -- size, src, dst
637 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
638 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
640 -- Simple bit-twiddling.
642 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
643 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
644 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
645 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
646 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
647 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
648 | SLL Reg RI Reg -- src1, src2, dst
649 | SRL Reg RI Reg -- src1, src2, dst
650 | SRA Reg RI Reg -- src1, src2, dst
651 | SETHI Imm Reg -- src, dst
652 | NOP -- Really SETHI 0, %g0, but worth an alias
656 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
657 -- right up until we spit them out.
659 | FABS Size Reg Reg -- src dst
660 | FADD Size Reg Reg Reg -- src1, src2, dst
661 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
662 | FDIV Size Reg Reg Reg -- src1, src2, dst
663 | FMOV Size Reg Reg -- src, dst
664 | FMUL Size Reg Reg Reg -- src1, src2, dst
665 | FNEG Size Reg Reg -- src, dst
666 | FSQRT Size Reg Reg -- src, dst
667 | FSUB Size Reg Reg Reg -- src1, src2, dst
668 | FxTOy Size Size Reg Reg -- src, dst
672 | BI Cond Bool Imm -- cond, annul?, target
673 | BF Cond Bool Imm -- cond, annul?, target
675 | JMP DestInfo MachRegsAddr -- target
676 | CALL Imm Int Bool -- target, args, terminal
683 riZero (RIImm (ImmInt 0)) = True
684 riZero (RIImm (ImmInteger 0)) = True
685 riZero (RIReg (RealReg 0)) = True
688 -- Calculate the effective address which would be used by the
689 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
690 -- alas -- can't have fpRelEA here because of module dependencies.
691 fpRelEA :: Int -> Reg -> Instr
693 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
695 -- Code to shift the stack pointer by n words.
696 moveSp :: Int -> Instr
698 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
700 -- Produce the second-half-of-a-double register given the first half.
702 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
703 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
704 #endif {- sparc_TARGET_ARCH -}