2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 -- #include "config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment, DestInfo(..) )
54 import Panic ( panic )
55 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
56 import Outputable ( pprPanic, ppr )
57 import IOExts ( trace )
62 underscorePrefix :: Bool -- leading underscore on assembler labels?
64 #ifdef LEADING_UNDERSCORE
65 underscorePrefix = True
67 underscorePrefix = False
70 ---------------------------
71 fmtAsmLbl :: String -> String -- for formatting labels
75 {- The alpha assembler likes temporary labels to look like $L123
76 instead of L123. (Don't toss the L, because then Lf28
85 % ----------------------------------------------------------------
87 We (allegedly) put the first six C-call arguments in registers;
88 where do we start putting the rest of them?
90 eXTRA_STK_ARGS_HERE :: Int
92 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
95 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
97 Size of a @PrimRep@, in bytes.
100 sizeOf :: PrimRep -> Integer{-in bytes-}
101 -- the result is an Integer only because it's more convenient
103 sizeOf pr = case (primRepToSize pr) of
104 IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2;-} L -> 4; {-SF -> 4;-} _ -> 8},)
105 IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
106 IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
109 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
111 Now the volatile saves and restores. We add the basic guys to the
112 list of ``user'' registers provided. Note that there are more basic
113 registers on the restore list, because some are reloaded from
116 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
119 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
121 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
122 restore_cands = save_cands
125 = map save ((filter callerSaves) (save_cands ++ vols))
127 save x = StAssign (magicIdPrimRep x) loc reg
129 reg = StReg (StixMagicId x)
130 loc = case stgReg x of
132 Always _ -> panic "volatileSaves"
134 volatileRestores vols
135 = map restore ((filter callerSaves) (restore_cands ++ vols))
137 restore x = StAssign (magicIdPrimRep x) reg loc
139 reg = StReg (StixMagicId x)
140 loc = case stgReg x of
142 Always _ -> panic "volatileRestores"
145 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147 Obviously slightly weedy
148 (Note that the floating point values aren't terribly important.)
151 targetMinDouble = MachDouble (-1.7976931348623157e+308)
152 targetMaxDouble = MachDouble (1.7976931348623157e+308)
153 targetMinInt = mkMachInt (-2147483648)
154 targetMaxInt = mkMachInt 2147483647
157 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 This algorithm for determining the $\log_2$ of exact powers of 2 comes
160 from GCC. It requires bit manipulation primitives, and we use GHC
167 exactLog2 :: Integer -> Maybe Integer
169 = if (x <= 0 || x >= 2147483648) then
172 case iUnbox (fromInteger x) of { x# ->
173 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
176 Just (toInteger (iBox (pow2 x#)))
179 shiftr x y = shiftRL# x y
181 pow2 x# | x# ==# 1# = 0#
182 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
185 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189 #if alpha_TARGET_ARCH
190 = ALWAYS -- For BI (same as BR)
191 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
193 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
194 | LE -- For CMP and BI
195 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
197 | NEVER -- For BI (null instruction)
198 | ULE -- For CMP only
199 | ULT -- For CMP only
202 = ALWAYS -- What's really used? ToDo
216 #if sparc_TARGET_ARCH
217 = ALWAYS -- What's really used? ToDo
238 #if alpha_TARGET_ARCH
241 -- | W -- word (2 bytes): UNUSED
243 | L -- longword (4 bytes)
244 | Q -- quadword (8 bytes)
245 -- | FF -- VAX F-style floating pt: UNUSED
246 -- | GF -- VAX G-style floating pt: UNUSED
247 -- | DF -- VAX D-style floating pt: UNUSED
248 -- | SF -- IEEE single-precision floating pt: UNUSED
249 | TF -- IEEE double-precision floating pt
253 -- | HB -- higher byte **UNUSED**
256 | F -- IEEE single-precision floating pt
257 | DF -- IEEE single-precision floating pt
258 | F80 -- Intel 80-bit internal FP format; only used for spilling
260 #if sparc_TARGET_ARCH
262 | BU -- byte (unsigned)
263 -- | HW -- halfword, 2 bytes (signed): UNUSED
264 -- | HWU -- halfword, 2 bytes (unsigned): UNUSED
266 -- | D -- doubleword, 8 bytes: UNUSED
267 | F -- IEEE single-precision floating pt
268 | DF -- IEEE single-precision floating pt
271 primRepToSize :: PrimRep -> Size
273 primRepToSize PtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
274 primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
275 primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
276 primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
277 primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
278 primRepToSize CharRep = IF_ARCH_alpha( L, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
279 primRepToSize Int8Rep = IF_ARCH_alpha( B, IF_ARCH_i386( B, IF_ARCH_sparc( B ,)))
280 primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
281 primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
282 primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
283 primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( F ,)))
284 primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,)))
285 primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
286 primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
287 primRepToSize PrimPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
288 primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
289 primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
290 primRepToSize BCORep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
291 primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
292 primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
293 -- SUP: Wrong!!! Only for testing the rest of the NCG
294 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
295 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
298 %************************************************************************
300 \subsection{Machine's assembly language}
302 %************************************************************************
304 We have a few common ``instructions'' (nearly all the pseudo-ops) but
305 mostly all of @Instr@ is machine-specific.
309 = COMMENT FAST_STRING -- comment pseudo-op
310 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
311 | LABEL CLabel -- global label pseudo-op
312 | ASCII Bool -- True <=> needs backslash conversion
313 String -- the literal string
316 | DELTA Int -- specify current stack offset for
317 -- benefit of subsequent passes
321 #if alpha_TARGET_ARCH
323 -- data Instr continues...
327 | LD Size Reg MachRegsAddr -- size, dst, src
328 | LDA Reg MachRegsAddr -- dst, src
329 | LDAH Reg MachRegsAddr -- dst, src
330 | LDGP Reg MachRegsAddr -- dst, src
331 | LDI Size Reg Imm -- size, dst, src
332 | ST Size Reg MachRegsAddr -- size, src, dst
337 | ABS Size RI Reg -- size, src, dst
338 | NEG Size Bool RI Reg -- size, overflow, src, dst
339 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
340 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
341 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
342 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
343 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
344 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
345 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
347 -- Simple bit-twiddling.
367 | CMP Cond Reg RI Reg
374 | FADD Size Reg Reg Reg
375 | FDIV Size Reg Reg Reg
376 | FMUL Size Reg Reg Reg
377 | FSUB Size Reg Reg Reg
378 | CVTxy Size Size Reg Reg
379 | FCMP Size Cond Reg Reg Reg
387 | JMP Reg MachRegsAddr Int
389 | JSR Reg MachRegsAddr Int
391 -- Alpha-specific pseudo-ops.
400 #endif {- alpha_TARGET_ARCH -}
403 Intel, in their infinite wisdom, selected a stack model for floating
404 point registers on x86. That might have made sense back in 1979 --
405 nowadays we can see it for the nonsense it really is. A stack model
406 fits poorly with the existing nativeGen infrastructure, which assumes
407 flat integer and FP register sets. Prior to this commit, nativeGen
408 could not generate correct x86 FP code -- to do so would have meant
409 somehow working the register-stack paradigm into the register
410 allocator and spiller, which sounds very difficult.
412 We have decided to cheat, and go for a simple fix which requires no
413 infrastructure modifications, at the expense of generating ropey but
414 correct FP code. All notions of the x86 FP stack and its insns have
415 been removed. Instead, we pretend (to the instruction selector and
416 register allocator) that x86 has six floating point registers, %fake0
417 .. %fake5, which can be used in the usual flat manner. We further
418 claim that x86 has floating point instructions very similar to SPARC
419 and Alpha, that is, a simple 3-operand register-register arrangement.
420 Code generation and register allocation proceed on this basis.
422 When we come to print out the final assembly, our convenient fiction
423 is converted to dismal reality. Each fake instruction is
424 independently converted to a series of real x86 instructions.
425 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
426 arithmetic operations, the two operands are pushed onto the top of the
427 FP stack, the operation done, and the result copied back into the
428 relevant register. There are only six %fake registers because 2 are
429 needed for the translation, and x86 has 8 in total.
431 The translation is inefficient but is simple and it works. A cleverer
432 translation would handle a sequence of insns, simulating the FP stack
433 contents, would not impose a fixed mapping from %fake to %st regs, and
434 hopefully could avoid most of the redundant reg-reg moves of the
437 We might as well make use of whatever unique FP facilities Intel have
438 chosen to bless us with (let's not be churlish, after all).
439 Hence GLDZ and GLD1. Bwahahahahahahaha!
441 LATER (10 Nov 2000): idiv gives problems with the register spiller,
442 because the spiller is simpleminded and because idiv has fixed uses of
443 %eax and %edx. Rather than make the spiller cleverer, we do away with
444 idiv, and instead have iquot and irem fake (integer) insns, which have
445 no operand register constraints -- ie, they behave like add, sub, mul.
446 The printer-outer transforms them to a sequence of real insns which does
447 the Right Thing (tm). As with the FP stuff, this gives ropey code,
448 but we don't care, since it doesn't get used much. We hope.
453 -- data Instr continues...
457 | MOV Size Operand Operand
458 | MOVZxL Size Operand Operand -- size is the size of operand 1
459 | MOVSxL Size Operand Operand -- size is the size of operand 1
461 -- Load effective address (also a very useful three-operand add instruction :-)
463 | LEA Size Operand Operand
467 | ADD Size Operand Operand
468 | SUB Size Operand Operand
469 | IMUL Size Operand Operand
471 -- Quotient and remainder. SEE comment above -- these are not
472 -- real x86 insns; instead they are expanded when printed
473 -- into a sequence of real insns.
475 | IQUOT Size Operand Operand
476 | IREM Size Operand Operand
478 -- Simple bit-twiddling.
480 | AND Size Operand Operand
481 | OR Size Operand Operand
482 | XOR Size Operand Operand
484 | NEGI Size Operand -- NEG instruction (name clash with Cond)
485 | SHL Size Imm Operand -- Only immediate shifts allowed
486 | SAR Size Imm Operand -- Only immediate shifts allowed
487 | SHR Size Imm Operand -- Only immediate shifts allowed
488 | BT Size Imm Operand
493 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
494 -- as single instructions right up until we spit them out.
496 -- all the 3-operand fake fp insns are src1 src2 dst
497 -- and furthermore are constrained to be fp regs only.
498 -- IMPORTANT: keep is_G_insn up to date with any changes here
499 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
500 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
501 | GST Size Reg MachRegsAddr -- src(fpreg), dst
503 | GLDZ Reg -- dst(fpreg)
504 | GLD1 Reg -- dst(fpreg)
506 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
507 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
509 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
510 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
512 | GITOF Reg Reg -- src(intreg), dst(fpreg)
513 | GITOD Reg Reg -- src(intreg), dst(fpreg)
515 | GADD Size Reg Reg Reg -- src1, src2, dst
516 | GDIV Size Reg Reg Reg -- src1, src2, dst
517 | GSUB Size Reg Reg Reg -- src1, src2, dst
518 | GMUL Size Reg Reg Reg -- src1, src2, dst
520 | GCMP Size Reg Reg -- src1, src2
522 | GABS Size Reg Reg -- src, dst
523 | GNEG Size Reg Reg -- src, dst
524 | GSQRT Size Reg Reg -- src, dst
525 | GSIN Size Reg Reg -- src, dst
526 | GCOS Size Reg Reg -- src, dst
527 | GTAN Size Reg Reg -- src, dst
529 | GFREE -- do ffree on all x86 regs; an ugly hack
532 | TEST Size Operand Operand
533 | CMP Size Operand Operand
545 | JMP DestInfo Operand -- possible dests, target
546 | JXX Cond CLabel -- target
551 | CLTD -- sign extend %eax into %edx:%eax
554 = OpReg Reg -- register
555 | OpImm Imm -- immediate value
556 | OpAddr MachRegsAddr -- memory reference
559 i386_insert_ffrees :: [Instr] -> [Instr]
560 i386_insert_ffrees insns
561 | any is_G_instr insns
562 = concatMap ffree_before_nonlocal_transfers insns
566 ffree_before_nonlocal_transfers insn
568 CALL _ -> [GFREE, insn]
569 -- Jumps to immediate labels are local
570 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
571 -- If a jump mentions dests, it is a local jump thru
573 JMP (DestInfo _) _ -> [insn]
574 JMP _ _ -> [GFREE, insn]
578 -- if you ever add a new FP insn to the fake x86 FP insn set,
579 -- you must update this too
580 is_G_instr :: Instr -> Bool
583 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
584 GLDZ _ -> True; GLD1 _ -> True;
585 GFTOD _ _ -> True; GFTOI _ _ -> True;
586 GDTOF _ _ -> True; GDTOI _ _ -> True;
587 GITOF _ _ -> True; GITOD _ _ -> True;
588 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
589 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
590 GCMP _ _ _ -> True; GABS _ _ _ -> True
591 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
592 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
593 GFREE -> panic "is_G_instr: GFREE (!)"
596 #endif {- i386_TARGET_ARCH -}
600 #if sparc_TARGET_ARCH
602 -- data Instr continues...
606 | LD Size MachRegsAddr Reg -- size, src, dst
607 | ST Size Reg MachRegsAddr -- size, src, dst
611 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
612 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
614 -- Simple bit-twiddling.
616 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
617 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
618 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
619 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
620 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
621 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
622 | SLL Reg RI Reg -- src1, src2, dst
623 | SRL Reg RI Reg -- src1, src2, dst
624 | SRA Reg RI Reg -- src1, src2, dst
625 | SETHI Imm Reg -- src, dst
626 | NOP -- Really SETHI 0, %g0, but worth an alias
630 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
631 -- right up until we spit them out.
633 | FABS Size Reg Reg -- src dst
634 | FADD Size Reg Reg Reg -- src1, src2, dst
635 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
636 | FDIV Size Reg Reg Reg -- src1, src2, dst
637 | FMOV Size Reg Reg -- src, dst
638 | FMUL Size Reg Reg Reg -- src1, src2, dst
639 | FNEG Size Reg Reg -- src, dst
640 | FSQRT Size Reg Reg -- src, dst
641 | FSUB Size Reg Reg Reg -- src1, src2, dst
642 | FxTOy Size Size Reg Reg -- src, dst
646 | BI Cond Bool Imm -- cond, annul?, target
647 | BF Cond Bool Imm -- cond, annul?, target
649 | JMP DestInfo MachRegsAddr -- target
650 | CALL Imm Int Bool -- target, args, terminal
657 riZero (RIImm (ImmInt 0)) = True
658 riZero (RIImm (ImmInteger 0)) = True
659 riZero (RIReg (RealReg 0)) = True
662 -- Calculate the effective address which would be used by the
663 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
664 -- alas -- can't have fpRelEA here because of module dependencies.
665 fpRelEA :: Int -> Reg -> Instr
667 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
669 -- Code to shift the stack pointer by n words.
670 moveSp :: Int -> Instr
672 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
674 -- Produce the second-half-of-a-double register given the first half.
676 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
677 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
678 #endif {- sparc_TARGET_ARCH -}