2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( callerSaves,
46 get_MagicId_addr, get_MagicId_reg_or_addr,
47 Imm(..), Reg(..), MachRegsAddr(..)
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixStmt(..), StixExpr(..), StixReg(..),
54 CodeSegment, DestInfo(..) )
55 import Panic ( panic )
57 import Outputable ( pprPanic, ppr, showSDoc )
58 import IOExts ( trace )
59 import Config ( cLeadingUnderscore )
62 import Maybe ( catMaybes )
66 underscorePrefix :: Bool -- leading underscore on assembler labels?
67 underscorePrefix = (cLeadingUnderscore == "YES")
69 ---------------------------
70 fmtAsmLbl :: String -> String -- for formatting labels
74 {- The alpha assembler likes temporary labels to look like $L123
75 instead of L123. (Don't toss the L, because then Lf28
84 % ----------------------------------------------------------------
86 We (allegedly) put the first six C-call arguments in registers;
87 where do we start putting the rest of them?
89 eXTRA_STK_ARGS_HERE :: Int
91 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96 Now the volatile saves and restores. We add the basic guys to the
97 list of ``user'' registers provided. Note that there are more basic
98 registers on the restore list, because some are reloaded from
101 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
104 volatileSaves, volatileRestores :: [MagicId] -> [StixStmt]
106 volatileSaves = volatileSavesOrRestores True
107 volatileRestores = volatileSavesOrRestores False
109 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
110 restore_cands = save_cands
112 volatileSavesOrRestores do_saves vols
113 = catMaybes (map mkCode vols)
116 | not (callerSaves mid)
118 | otherwise -- must be callee-saves ...
119 = case get_MagicId_reg_or_addr mid of
120 -- If stored in BaseReg, we ain't interested
124 -- OK, it's callee-saves, and in a real reg (rrno).
125 -- We have to cook up some transfer code.
126 {- Note that the use of (StixMagicId mid) here is a bit subtle.
127 Here, we only create those for MagicIds which are stored in
128 a real reg on this arch -- the preceding case on the result
129 of get_MagicId_reg_or_addr guarantees this. Later, when
130 selecting insns, that means these assignments are sure to turn
131 into real reg-to-mem or mem-to-reg moves, rather than being
132 pointless moves from some address in the reg-table
135 -> Just (StAssignMem rep addr
136 (StReg (StixMagicId mid)))
138 -> Just (StAssignReg rep (StixMagicId mid)
141 rep = magicIdPrimRep mid
142 addr = get_MagicId_addr mid
145 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147 Obviously slightly weedy
148 (Note that the floating point values aren't terribly important.)
151 targetMinDouble = MachDouble (-1.7976931348623157e+308)
152 targetMaxDouble = MachDouble (1.7976931348623157e+308)
153 targetMinInt = mkMachInt (-2147483648)
154 targetMaxInt = mkMachInt 2147483647
157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 This algorithm for determining the $\log_2$ of exact powers of 2 comes
160 from GCC. It requires bit manipulation primitives, and we use GHC
167 exactLog2 :: Integer -> Maybe Integer
169 = if (x <= 0 || x >= 2147483648) then
172 case iUnbox (fromInteger x) of { x# ->
173 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
176 Just (toInteger (iBox (pow2 x#)))
179 pow2 x# | x# ==# 1# = 0#
180 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
183 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 #if alpha_TARGET_ARCH
188 = ALWAYS -- For BI (same as BR)
189 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
191 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
192 | LE -- For CMP and BI
193 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
195 | NEVER -- For BI (null instruction)
196 | ULE -- For CMP only
197 | ULT -- For CMP only
200 = ALWAYS -- What's really used? ToDo
216 #if sparc_TARGET_ARCH
217 = ALWAYS -- What's really used? ToDo
234 deriving Eq -- to make an assertion work
239 #if alpha_TARGET_ARCH
242 -- | W -- word (2 bytes): UNUSED
244 | L -- longword (4 bytes)
245 | Q -- quadword (8 bytes)
246 -- | FF -- VAX F-style floating pt: UNUSED
247 -- | GF -- VAX G-style floating pt: UNUSED
248 -- | DF -- VAX D-style floating pt: UNUSED
249 -- | SF -- IEEE single-precision floating pt: UNUSED
250 | TF -- IEEE double-precision floating pt
254 | Bu -- byte (unsigned)
256 | Wu -- word (unsigned)
257 | L -- longword (signed)
258 | Lu -- longword (unsigned)
259 | F -- IEEE single-precision floating pt
260 | DF -- IEEE single-precision floating pt
261 | F80 -- Intel 80-bit internal FP format; only used for spilling
263 #if sparc_TARGET_ARCH
265 | Bu -- byte (unsigned)
266 | H -- halfword (signed, 2 bytes)
267 | Hu -- halfword (unsigned, 2 bytes)
268 | W -- word (4 bytes)
269 | F -- IEEE single-precision floating pt
270 | DF -- IEEE single-precision floating pt
273 primRepToSize :: PrimRep -> Size
275 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
276 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
277 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
278 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
279 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
280 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
282 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
283 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(H, )))
284 where err = primRepToSize_fail "Int16Rep"
285 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
286 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
287 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(Hu, )))
288 where err = primRepToSize_fail "Word16Rep"
289 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
291 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
292 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
293 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
294 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
295 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
296 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
298 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
299 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
300 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
301 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
302 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
303 primRepToSize StableNameRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
304 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
306 primRepToSize Word64Rep = primRepToSize_fail "Word64Rep"
307 primRepToSize Int64Rep = primRepToSize_fail "Int64Rep"
308 primRepToSize other = primRepToSize_fail (showSDoc (ppr other))
310 primRepToSize_fail str
311 = error ("ERROR: MachMisc.primRepToSize: cannot handle `" ++ str ++ "'.\n\t"
312 ++ "Workaround: use -fvia-C.\n\t"
313 ++ "Perhaps you should report it as a GHC bug,\n\t"
314 ++ "to glasgow-haskell-bugs@haskell.org.")
318 %************************************************************************
320 \subsection{Machine's assembly language}
322 %************************************************************************
324 We have a few common ``instructions'' (nearly all the pseudo-ops) but
325 mostly all of @Instr@ is machine-specific.
329 = COMMENT FAST_STRING -- comment pseudo-op
330 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
331 | LABEL CLabel -- global label pseudo-op
332 | ASCII Bool -- True <=> needs backslash conversion
333 String -- the literal string
336 | DELTA Int -- specify current stack offset for
337 -- benefit of subsequent passes
341 #if alpha_TARGET_ARCH
343 -- data Instr continues...
347 | LD Size Reg MachRegsAddr -- size, dst, src
348 | LDA Reg MachRegsAddr -- dst, src
349 | LDAH Reg MachRegsAddr -- dst, src
350 | LDGP Reg MachRegsAddr -- dst, src
351 | LDI Size Reg Imm -- size, dst, src
352 | ST Size Reg MachRegsAddr -- size, src, dst
357 | ABS Size RI Reg -- size, src, dst
358 | NEG Size Bool RI Reg -- size, overflow, src, dst
359 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
360 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
361 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
362 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
363 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
364 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
365 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
367 -- Simple bit-twiddling.
387 | CMP Cond Reg RI Reg
394 | FADD Size Reg Reg Reg
395 | FDIV Size Reg Reg Reg
396 | FMUL Size Reg Reg Reg
397 | FSUB Size Reg Reg Reg
398 | CVTxy Size Size Reg Reg
399 | FCMP Size Cond Reg Reg Reg
407 | JMP Reg MachRegsAddr Int
409 | JSR Reg MachRegsAddr Int
411 -- Alpha-specific pseudo-ops.
420 #endif {- alpha_TARGET_ARCH -}
423 Intel, in their infinite wisdom, selected a stack model for floating
424 point registers on x86. That might have made sense back in 1979 --
425 nowadays we can see it for the nonsense it really is. A stack model
426 fits poorly with the existing nativeGen infrastructure, which assumes
427 flat integer and FP register sets. Prior to this commit, nativeGen
428 could not generate correct x86 FP code -- to do so would have meant
429 somehow working the register-stack paradigm into the register
430 allocator and spiller, which sounds very difficult.
432 We have decided to cheat, and go for a simple fix which requires no
433 infrastructure modifications, at the expense of generating ropey but
434 correct FP code. All notions of the x86 FP stack and its insns have
435 been removed. Instead, we pretend (to the instruction selector and
436 register allocator) that x86 has six floating point registers, %fake0
437 .. %fake5, which can be used in the usual flat manner. We further
438 claim that x86 has floating point instructions very similar to SPARC
439 and Alpha, that is, a simple 3-operand register-register arrangement.
440 Code generation and register allocation proceed on this basis.
442 When we come to print out the final assembly, our convenient fiction
443 is converted to dismal reality. Each fake instruction is
444 independently converted to a series of real x86 instructions.
445 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
446 arithmetic operations, the two operands are pushed onto the top of the
447 FP stack, the operation done, and the result copied back into the
448 relevant register. There are only six %fake registers because 2 are
449 needed for the translation, and x86 has 8 in total.
451 The translation is inefficient but is simple and it works. A cleverer
452 translation would handle a sequence of insns, simulating the FP stack
453 contents, would not impose a fixed mapping from %fake to %st regs, and
454 hopefully could avoid most of the redundant reg-reg moves of the
457 We might as well make use of whatever unique FP facilities Intel have
458 chosen to bless us with (let's not be churlish, after all).
459 Hence GLDZ and GLD1. Bwahahahahahahaha!
461 LATER (10 Nov 2000): idiv gives problems with the register spiller,
462 because the spiller is simpleminded and because idiv has fixed uses of
463 %eax and %edx. Rather than make the spiller cleverer, we do away with
464 idiv, and instead have iquot and irem fake (integer) insns, which have
465 no operand register constraints -- ie, they behave like add, sub, mul.
466 The printer-outer transforms them to a sequence of real insns which does
467 the Right Thing (tm). As with the FP stuff, this gives ropey code,
468 but we don't care, since it doesn't get used much. We hope.
473 -- data Instr continues...
477 | MOV Size Operand Operand
478 | MOVZxL Size Operand Operand -- size is the size of operand 1
479 | MOVSxL Size Operand Operand -- size is the size of operand 1
481 -- Load effective address (also a very useful three-operand add instruction :-)
483 | LEA Size Operand Operand
487 | ADD Size Operand Operand
488 | SUB Size Operand Operand
489 | IMUL Size Operand Operand -- signed int mul
490 | MUL Size Operand Operand -- unsigned int mul
491 | IMUL64 Reg Reg -- 32 x 32 -> 64 signed mul
492 -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
494 -- Quotient and remainder. SEE comment above -- these are not
495 -- real x86 insns; instead they are expanded when printed
496 -- into a sequence of real insns.
498 | IQUOT Size Operand Operand -- signed quotient
499 | IREM Size Operand Operand -- signed remainder
500 | QUOT Size Operand Operand -- unsigned quotient
501 | REM Size Operand Operand -- unsigned remainder
503 -- Simple bit-twiddling.
505 | AND Size Operand Operand
506 | OR Size Operand Operand
507 | XOR Size Operand Operand
509 | NEGI Size Operand -- NEG instruction (name clash with Cond)
510 | SHL Size Imm Operand -- Only immediate shifts allowed
511 | SAR Size Imm Operand -- Only immediate shifts allowed
512 | SHR Size Imm Operand -- Only immediate shifts allowed
513 | BT Size Imm Operand
518 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
519 -- as single instructions right up until we spit them out.
521 -- all the 3-operand fake fp insns are src1 src2 dst
522 -- and furthermore are constrained to be fp regs only.
523 -- IMPORTANT: keep is_G_insn up to date with any changes here
524 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
525 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
526 | GST Size Reg MachRegsAddr -- src(fpreg), dst
528 | GLDZ Reg -- dst(fpreg)
529 | GLD1 Reg -- dst(fpreg)
531 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
532 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
534 | GITOF Reg Reg -- src(intreg), dst(fpreg)
535 | GITOD Reg Reg -- src(intreg), dst(fpreg)
537 | GADD Size Reg Reg Reg -- src1, src2, dst
538 | GDIV Size Reg Reg Reg -- src1, src2, dst
539 | GSUB Size Reg Reg Reg -- src1, src2, dst
540 | GMUL Size Reg Reg Reg -- src1, src2, dst
542 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
543 -- Compare src1 with src2; set the Zero flag iff the numbers are
544 -- comparable and the comparison is True. Subsequent code must
545 -- test the %eflags zero flag regardless of the supplied Cond.
546 | GCMP Cond Reg Reg -- src1, src2
548 | GABS Size Reg Reg -- src, dst
549 | GNEG Size Reg Reg -- src, dst
550 | GSQRT Size Reg Reg -- src, dst
551 | GSIN Size Reg Reg -- src, dst
552 | GCOS Size Reg Reg -- src, dst
553 | GTAN Size Reg Reg -- src, dst
555 | GFREE -- do ffree on all x86 regs; an ugly hack
558 | TEST Size Operand Operand
559 | CMP Size Operand Operand
571 | JMP DestInfo Operand -- possible dests, target
572 | JXX Cond CLabel -- target
577 | CLTD -- sign extend %eax into %edx:%eax
580 = OpReg Reg -- register
581 | OpImm Imm -- immediate value
582 | OpAddr MachRegsAddr -- memory reference
585 i386_insert_ffrees :: [Instr] -> [Instr]
586 i386_insert_ffrees insns
587 | any is_G_instr insns
588 = concatMap ffree_before_nonlocal_transfers insns
592 ffree_before_nonlocal_transfers insn
594 CALL _ -> [GFREE, insn]
595 -- Jumps to immediate labels are local
596 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
597 -- If a jump mentions dests, it is a local jump thru
599 JMP (DestInfo _) _ -> [insn]
600 JMP _ _ -> [GFREE, insn]
604 -- if you ever add a new FP insn to the fake x86 FP insn set,
605 -- you must update this too
606 is_G_instr :: Instr -> Bool
609 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
610 GLDZ _ -> True; GLD1 _ -> True;
611 GFTOI _ _ -> True; GDTOI _ _ -> True;
612 GITOF _ _ -> True; GITOD _ _ -> True;
613 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
614 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
615 GCMP _ _ _ -> True; GABS _ _ _ -> True
616 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
617 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
618 GFREE -> panic "is_G_instr: GFREE (!)"
621 #endif {- i386_TARGET_ARCH -}
625 #if sparc_TARGET_ARCH
627 -- data Instr continues...
631 | LD Size MachRegsAddr Reg -- size, src, dst
632 | ST Size Reg MachRegsAddr -- size, src, dst
636 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
637 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
638 | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
639 | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
640 | RDY Reg -- move contents of Y register to reg
642 -- Simple bit-twiddling.
644 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
645 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
646 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
647 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
648 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
649 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
650 | SLL Reg RI Reg -- src1, src2, dst
651 | SRL Reg RI Reg -- src1, src2, dst
652 | SRA Reg RI Reg -- src1, src2, dst
653 | SETHI Imm Reg -- src, dst
654 | NOP -- Really SETHI 0, %g0, but worth an alias
658 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
659 -- right up until we spit them out.
661 | FABS Size Reg Reg -- src dst
662 | FADD Size Reg Reg Reg -- src1, src2, dst
663 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
664 | FDIV Size Reg Reg Reg -- src1, src2, dst
665 | FMOV Size Reg Reg -- src, dst
666 | FMUL Size Reg Reg Reg -- src1, src2, dst
667 | FNEG Size Reg Reg -- src, dst
668 | FSQRT Size Reg Reg -- src, dst
669 | FSUB Size Reg Reg Reg -- src1, src2, dst
670 | FxTOy Size Size Reg Reg -- src, dst
674 | BI Cond Bool Imm -- cond, annul?, target
675 | BF Cond Bool Imm -- cond, annul?, target
677 | JMP DestInfo MachRegsAddr -- target
678 | CALL Imm Int Bool -- target, args, terminal
685 riZero (RIImm (ImmInt 0)) = True
686 riZero (RIImm (ImmInteger 0)) = True
687 riZero (RIReg (RealReg 0)) = True
690 -- Calculate the effective address which would be used by the
691 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
692 -- alas -- can't have fpRelEA here because of module dependencies.
693 fpRelEA :: Int -> Reg -> Instr
695 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
697 -- Code to shift the stack pointer by n words.
698 moveSp :: Int -> Instr
700 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
702 -- Produce the second-half-of-a-double register given the first half.
704 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
705 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
706 #endif {- sparc_TARGET_ARCH -}