2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment, DestInfo(..) )
54 import Panic ( panic )
55 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
56 import Outputable ( pprPanic, ppr )
57 import IOExts ( trace )
58 import Config ( cLeadingUnderscore )
63 underscorePrefix :: Bool -- leading underscore on assembler labels?
64 underscorePrefix = (cLeadingUnderscore == "YES")
66 ---------------------------
67 fmtAsmLbl :: String -> String -- for formatting labels
71 {- The alpha assembler likes temporary labels to look like $L123
72 instead of L123. (Don't toss the L, because then Lf28
81 % ----------------------------------------------------------------
83 We (allegedly) put the first six C-call arguments in registers;
84 where do we start putting the rest of them?
86 eXTRA_STK_ARGS_HERE :: Int
88 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
91 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
93 Size of a @PrimRep@, in bytes.
96 sizeOf :: PrimRep -> Int{-in bytes-}
97 sizeOf pr = case primRepToSize pr of
98 IF_ARCH_alpha({B->1; Bu->1; {-W->2; Wu->2;-} L->4; {-SF->4;-} Q->8; TF->8},)
99 IF_ARCH_i386 ({B->1; Bu->1; W->2; Wu->2; L->4; Lu->4; F->4; DF->8; F80->10},)
100 IF_ARCH_sparc({B->1; Bu->1; W->4; F->4; DF->8},)
103 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
105 Now the volatile saves and restores. We add the basic guys to the
106 list of ``user'' registers provided. Note that there are more basic
107 registers on the restore list, because some are reloaded from
110 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
113 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
115 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
116 restore_cands = save_cands
119 = map save ((filter callerSaves) (save_cands ++ vols))
121 save x = StAssign (magicIdPrimRep x) loc reg
123 reg = StReg (StixMagicId x)
124 loc = case stgReg x of
126 Always _ -> panic "volatileSaves"
128 volatileRestores vols
129 = map restore ((filter callerSaves) (restore_cands ++ vols))
131 restore x = StAssign (magicIdPrimRep x) reg loc
133 reg = StReg (StixMagicId x)
134 loc = case stgReg x of
136 Always _ -> panic "volatileRestores"
139 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141 Obviously slightly weedy
142 (Note that the floating point values aren't terribly important.)
145 targetMinDouble = MachDouble (-1.7976931348623157e+308)
146 targetMaxDouble = MachDouble (1.7976931348623157e+308)
147 targetMinInt = mkMachInt (-2147483648)
148 targetMaxInt = mkMachInt 2147483647
151 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
153 This algorithm for determining the $\log_2$ of exact powers of 2 comes
154 from GCC. It requires bit manipulation primitives, and we use GHC
161 exactLog2 :: Integer -> Maybe Integer
163 = if (x <= 0 || x >= 2147483648) then
166 case iUnbox (fromInteger x) of { x# ->
167 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
170 Just (toInteger (iBox (pow2 x#)))
173 shiftr x y = shiftRL# x y
175 pow2 x# | x# ==# 1# = 0#
176 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
179 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183 #if alpha_TARGET_ARCH
184 = ALWAYS -- For BI (same as BR)
185 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
187 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
188 | LE -- For CMP and BI
189 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
191 | NEVER -- For BI (null instruction)
192 | ULE -- For CMP only
193 | ULT -- For CMP only
196 = ALWAYS -- What's really used? ToDo
210 #if sparc_TARGET_ARCH
211 = ALWAYS -- What's really used? ToDo
232 #if alpha_TARGET_ARCH
235 -- | W -- word (2 bytes): UNUSED
237 | L -- longword (4 bytes)
238 | Q -- quadword (8 bytes)
239 -- | FF -- VAX F-style floating pt: UNUSED
240 -- | GF -- VAX G-style floating pt: UNUSED
241 -- | DF -- VAX D-style floating pt: UNUSED
242 -- | SF -- IEEE single-precision floating pt: UNUSED
243 | TF -- IEEE double-precision floating pt
247 | Bu -- byte (unsigned)
249 | Wu -- word (unsigned)
250 | L -- longword (signed)
251 | Lu -- longword (unsigned)
252 | F -- IEEE single-precision floating pt
253 | DF -- IEEE single-precision floating pt
254 | F80 -- Intel 80-bit internal FP format; only used for spilling
256 #if sparc_TARGET_ARCH
258 | Bu -- byte (unsigned)
259 | W -- word (4 bytes)
260 | F -- IEEE single-precision floating pt
261 | DF -- IEEE single-precision floating pt
264 primRepToSize :: PrimRep -> Size
266 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
267 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
268 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
269 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
270 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
271 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
273 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
274 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(err,)))
275 where err = panic "primRepToSize Int16Rep"
276 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
277 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
278 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
279 where err = panic "primRepToSize Word16Rep"
280 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
282 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
283 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
284 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
285 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
286 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
287 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
288 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
289 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
290 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
291 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
292 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
293 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
294 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
295 -- SUP: Wrong!!! Only for testing the rest of the NCG
296 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
297 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
300 %************************************************************************
302 \subsection{Machine's assembly language}
304 %************************************************************************
306 We have a few common ``instructions'' (nearly all the pseudo-ops) but
307 mostly all of @Instr@ is machine-specific.
311 = COMMENT FAST_STRING -- comment pseudo-op
312 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
313 | LABEL CLabel -- global label pseudo-op
314 | ASCII Bool -- True <=> needs backslash conversion
315 String -- the literal string
318 | DELTA Int -- specify current stack offset for
319 -- benefit of subsequent passes
323 #if alpha_TARGET_ARCH
325 -- data Instr continues...
329 | LD Size Reg MachRegsAddr -- size, dst, src
330 | LDA Reg MachRegsAddr -- dst, src
331 | LDAH Reg MachRegsAddr -- dst, src
332 | LDGP Reg MachRegsAddr -- dst, src
333 | LDI Size Reg Imm -- size, dst, src
334 | ST Size Reg MachRegsAddr -- size, src, dst
339 | ABS Size RI Reg -- size, src, dst
340 | NEG Size Bool RI Reg -- size, overflow, src, dst
341 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
342 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
343 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
344 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
345 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
346 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
347 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
349 -- Simple bit-twiddling.
369 | CMP Cond Reg RI Reg
376 | FADD Size Reg Reg Reg
377 | FDIV Size Reg Reg Reg
378 | FMUL Size Reg Reg Reg
379 | FSUB Size Reg Reg Reg
380 | CVTxy Size Size Reg Reg
381 | FCMP Size Cond Reg Reg Reg
389 | JMP Reg MachRegsAddr Int
391 | JSR Reg MachRegsAddr Int
393 -- Alpha-specific pseudo-ops.
402 #endif {- alpha_TARGET_ARCH -}
405 Intel, in their infinite wisdom, selected a stack model for floating
406 point registers on x86. That might have made sense back in 1979 --
407 nowadays we can see it for the nonsense it really is. A stack model
408 fits poorly with the existing nativeGen infrastructure, which assumes
409 flat integer and FP register sets. Prior to this commit, nativeGen
410 could not generate correct x86 FP code -- to do so would have meant
411 somehow working the register-stack paradigm into the register
412 allocator and spiller, which sounds very difficult.
414 We have decided to cheat, and go for a simple fix which requires no
415 infrastructure modifications, at the expense of generating ropey but
416 correct FP code. All notions of the x86 FP stack and its insns have
417 been removed. Instead, we pretend (to the instruction selector and
418 register allocator) that x86 has six floating point registers, %fake0
419 .. %fake5, which can be used in the usual flat manner. We further
420 claim that x86 has floating point instructions very similar to SPARC
421 and Alpha, that is, a simple 3-operand register-register arrangement.
422 Code generation and register allocation proceed on this basis.
424 When we come to print out the final assembly, our convenient fiction
425 is converted to dismal reality. Each fake instruction is
426 independently converted to a series of real x86 instructions.
427 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
428 arithmetic operations, the two operands are pushed onto the top of the
429 FP stack, the operation done, and the result copied back into the
430 relevant register. There are only six %fake registers because 2 are
431 needed for the translation, and x86 has 8 in total.
433 The translation is inefficient but is simple and it works. A cleverer
434 translation would handle a sequence of insns, simulating the FP stack
435 contents, would not impose a fixed mapping from %fake to %st regs, and
436 hopefully could avoid most of the redundant reg-reg moves of the
439 We might as well make use of whatever unique FP facilities Intel have
440 chosen to bless us with (let's not be churlish, after all).
441 Hence GLDZ and GLD1. Bwahahahahahahaha!
443 LATER (10 Nov 2000): idiv gives problems with the register spiller,
444 because the spiller is simpleminded and because idiv has fixed uses of
445 %eax and %edx. Rather than make the spiller cleverer, we do away with
446 idiv, and instead have iquot and irem fake (integer) insns, which have
447 no operand register constraints -- ie, they behave like add, sub, mul.
448 The printer-outer transforms them to a sequence of real insns which does
449 the Right Thing (tm). As with the FP stuff, this gives ropey code,
450 but we don't care, since it doesn't get used much. We hope.
455 -- data Instr continues...
459 | MOV Size Operand Operand
460 | MOVZxL Size Operand Operand -- size is the size of operand 1
461 | MOVSxL Size Operand Operand -- size is the size of operand 1
463 -- Load effective address (also a very useful three-operand add instruction :-)
465 | LEA Size Operand Operand
469 | ADD Size Operand Operand
470 | SUB Size Operand Operand
471 | IMUL Size Operand Operand
473 -- Quotient and remainder. SEE comment above -- these are not
474 -- real x86 insns; instead they are expanded when printed
475 -- into a sequence of real insns.
477 | IQUOT Size Operand Operand
478 | IREM Size Operand Operand
480 -- Simple bit-twiddling.
482 | AND Size Operand Operand
483 | OR Size Operand Operand
484 | XOR Size Operand Operand
486 | NEGI Size Operand -- NEG instruction (name clash with Cond)
487 | SHL Size Imm Operand -- Only immediate shifts allowed
488 | SAR Size Imm Operand -- Only immediate shifts allowed
489 | SHR Size Imm Operand -- Only immediate shifts allowed
490 | BT Size Imm Operand
495 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
496 -- as single instructions right up until we spit them out.
498 -- all the 3-operand fake fp insns are src1 src2 dst
499 -- and furthermore are constrained to be fp regs only.
500 -- IMPORTANT: keep is_G_insn up to date with any changes here
501 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
502 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
503 | GST Size Reg MachRegsAddr -- src(fpreg), dst
505 | GLDZ Reg -- dst(fpreg)
506 | GLD1 Reg -- dst(fpreg)
508 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
509 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
511 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
512 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
514 | GITOF Reg Reg -- src(intreg), dst(fpreg)
515 | GITOD Reg Reg -- src(intreg), dst(fpreg)
517 | GADD Size Reg Reg Reg -- src1, src2, dst
518 | GDIV Size Reg Reg Reg -- src1, src2, dst
519 | GSUB Size Reg Reg Reg -- src1, src2, dst
520 | GMUL Size Reg Reg Reg -- src1, src2, dst
522 | GCMP Size Reg Reg -- src1, src2
524 | GABS Size Reg Reg -- src, dst
525 | GNEG Size Reg Reg -- src, dst
526 | GSQRT Size Reg Reg -- src, dst
527 | GSIN Size Reg Reg -- src, dst
528 | GCOS Size Reg Reg -- src, dst
529 | GTAN Size Reg Reg -- src, dst
531 | GFREE -- do ffree on all x86 regs; an ugly hack
534 | TEST Size Operand Operand
535 | CMP Size Operand Operand
547 | JMP DestInfo Operand -- possible dests, target
548 | JXX Cond CLabel -- target
553 | CLTD -- sign extend %eax into %edx:%eax
556 = OpReg Reg -- register
557 | OpImm Imm -- immediate value
558 | OpAddr MachRegsAddr -- memory reference
561 i386_insert_ffrees :: [Instr] -> [Instr]
562 i386_insert_ffrees insns
563 | any is_G_instr insns
564 = concatMap ffree_before_nonlocal_transfers insns
568 ffree_before_nonlocal_transfers insn
570 CALL _ -> [GFREE, insn]
571 -- Jumps to immediate labels are local
572 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
573 -- If a jump mentions dests, it is a local jump thru
575 JMP (DestInfo _) _ -> [insn]
576 JMP _ _ -> [GFREE, insn]
580 -- if you ever add a new FP insn to the fake x86 FP insn set,
581 -- you must update this too
582 is_G_instr :: Instr -> Bool
585 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
586 GLDZ _ -> True; GLD1 _ -> True;
587 GFTOD _ _ -> True; GFTOI _ _ -> True;
588 GDTOF _ _ -> True; GDTOI _ _ -> True;
589 GITOF _ _ -> True; GITOD _ _ -> True;
590 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
591 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
592 GCMP _ _ _ -> True; GABS _ _ _ -> True
593 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
594 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
595 GFREE -> panic "is_G_instr: GFREE (!)"
598 #endif {- i386_TARGET_ARCH -}
602 #if sparc_TARGET_ARCH
604 -- data Instr continues...
608 | LD Size MachRegsAddr Reg -- size, src, dst
609 | ST Size Reg MachRegsAddr -- size, src, dst
613 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
614 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
616 -- Simple bit-twiddling.
618 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
619 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
620 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
621 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
622 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
623 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
624 | SLL Reg RI Reg -- src1, src2, dst
625 | SRL Reg RI Reg -- src1, src2, dst
626 | SRA Reg RI Reg -- src1, src2, dst
627 | SETHI Imm Reg -- src, dst
628 | NOP -- Really SETHI 0, %g0, but worth an alias
632 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
633 -- right up until we spit them out.
635 | FABS Size Reg Reg -- src dst
636 | FADD Size Reg Reg Reg -- src1, src2, dst
637 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
638 | FDIV Size Reg Reg Reg -- src1, src2, dst
639 | FMOV Size Reg Reg -- src, dst
640 | FMUL Size Reg Reg Reg -- src1, src2, dst
641 | FNEG Size Reg Reg -- src, dst
642 | FSQRT Size Reg Reg -- src, dst
643 | FSUB Size Reg Reg Reg -- src1, src2, dst
644 | FxTOy Size Size Reg Reg -- src, dst
648 | BI Cond Bool Imm -- cond, annul?, target
649 | BF Cond Bool Imm -- cond, annul?, target
651 | JMP DestInfo MachRegsAddr -- target
652 | CALL Imm Int Bool -- target, args, terminal
659 riZero (RIImm (ImmInt 0)) = True
660 riZero (RIImm (ImmInteger 0)) = True
661 riZero (RIReg (RealReg 0)) = True
664 -- Calculate the effective address which would be used by the
665 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
666 -- alas -- can't have fpRelEA here because of module dependencies.
667 fpRelEA :: Int -> Reg -> Instr
669 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
671 -- Code to shift the stack pointer by n words.
672 moveSp :: Int -> Instr
674 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
676 -- Produce the second-half-of-a-double register given the first half.
678 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
679 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
680 #endif {- sparc_TARGET_ARCH -}