2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 #include "../includes/config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment, DestInfo(..) )
54 import Panic ( panic )
55 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
56 import Outputable ( pprPanic, ppr )
57 import IOExts ( trace )
62 underscorePrefix :: Bool -- leading underscore on assembler labels?
64 #ifdef LEADING_UNDERSCORE
65 underscorePrefix = True
67 underscorePrefix = False
70 ---------------------------
71 fmtAsmLbl :: String -> String -- for formatting labels
75 {- The alpha assembler likes temporary labels to look like $L123
76 instead of L123. (Don't toss the L, because then Lf28
85 % ----------------------------------------------------------------
87 We (allegedly) put the first six C-call arguments in registers;
88 where do we start putting the rest of them?
90 eXTRA_STK_ARGS_HERE :: Int
92 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
95 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
97 Size of a @PrimRep@, in bytes.
100 sizeOf :: PrimRep -> Int{-in bytes-}
101 sizeOf pr = case primRepToSize pr of
102 IF_ARCH_alpha({B->1; Bu->1; {-W->2; Wu->2;-} L->4; {-SF->4;-} Q->8; TF->8},)
103 IF_ARCH_i386 ({B->1; Bu->1; W->2; Wu->2; L->4; Lu->4; F->4; DF->8; F80->10},)
104 IF_ARCH_sparc({B->1; Bu->1; W->4; F->4; DF->8},)
107 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
109 Now the volatile saves and restores. We add the basic guys to the
110 list of ``user'' registers provided. Note that there are more basic
111 registers on the restore list, because some are reloaded from
114 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
117 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
119 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
120 restore_cands = save_cands
123 = map save ((filter callerSaves) (save_cands ++ vols))
125 save x = StAssign (magicIdPrimRep x) loc reg
127 reg = StReg (StixMagicId x)
128 loc = case stgReg x of
130 Always _ -> panic "volatileSaves"
132 volatileRestores vols
133 = map restore ((filter callerSaves) (restore_cands ++ vols))
135 restore x = StAssign (magicIdPrimRep x) reg loc
137 reg = StReg (StixMagicId x)
138 loc = case stgReg x of
140 Always _ -> panic "volatileRestores"
143 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
145 Obviously slightly weedy
146 (Note that the floating point values aren't terribly important.)
149 targetMinDouble = MachDouble (-1.7976931348623157e+308)
150 targetMaxDouble = MachDouble (1.7976931348623157e+308)
151 targetMinInt = mkMachInt (-2147483648)
152 targetMaxInt = mkMachInt 2147483647
155 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
157 This algorithm for determining the $\log_2$ of exact powers of 2 comes
158 from GCC. It requires bit manipulation primitives, and we use GHC
165 exactLog2 :: Integer -> Maybe Integer
167 = if (x <= 0 || x >= 2147483648) then
170 case iUnbox (fromInteger x) of { x# ->
171 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
174 Just (toInteger (iBox (pow2 x#)))
177 shiftr x y = shiftRL# x y
179 pow2 x# | x# ==# 1# = 0#
180 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
183 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 #if alpha_TARGET_ARCH
188 = ALWAYS -- For BI (same as BR)
189 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
191 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
192 | LE -- For CMP and BI
193 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
195 | NEVER -- For BI (null instruction)
196 | ULE -- For CMP only
197 | ULT -- For CMP only
200 = ALWAYS -- What's really used? ToDo
214 #if sparc_TARGET_ARCH
215 = ALWAYS -- What's really used? ToDo
236 #if alpha_TARGET_ARCH
239 -- | W -- word (2 bytes): UNUSED
241 | L -- longword (4 bytes)
242 | Q -- quadword (8 bytes)
243 -- | FF -- VAX F-style floating pt: UNUSED
244 -- | GF -- VAX G-style floating pt: UNUSED
245 -- | DF -- VAX D-style floating pt: UNUSED
246 -- | SF -- IEEE single-precision floating pt: UNUSED
247 | TF -- IEEE double-precision floating pt
251 | Bu -- byte (unsigned)
253 | Wu -- word (unsigned)
254 | L -- longword (signed)
255 | Lu -- longword (unsigned)
256 | F -- IEEE single-precision floating pt
257 | DF -- IEEE single-precision floating pt
258 | F80 -- Intel 80-bit internal FP format; only used for spilling
260 #if sparc_TARGET_ARCH
262 | Bu -- byte (unsigned)
263 | W -- word (4 bytes)
264 | F -- IEEE single-precision floating pt
265 | DF -- IEEE single-precision floating pt
268 primRepToSize :: PrimRep -> Size
270 primRepToSize PtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
271 primRepToSize CodePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
272 primRepToSize DataPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
273 primRepToSize RetRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
274 primRepToSize CostCentreRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
275 primRepToSize CharRep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
277 primRepToSize Int8Rep = IF_ARCH_alpha(B, IF_ARCH_i386(B, IF_ARCH_sparc(B, )))
278 primRepToSize Int16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(W, IF_ARCH_sparc(err,)))
279 where err = panic "primRepToSize Int16Rep"
280 primRepToSize Int32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
281 primRepToSize Word8Rep = IF_ARCH_alpha(Bu, IF_ARCH_i386(Bu, IF_ARCH_sparc(Bu, )))
282 primRepToSize Word16Rep = IF_ARCH_alpha(err,IF_ARCH_i386(Wu, IF_ARCH_sparc(err,)))
283 where err = panic "primRepToSize Word16Rep"
284 primRepToSize Word32Rep = IF_ARCH_alpha(L, IF_ARCH_i386(Lu, IF_ARCH_sparc(W, )))
286 primRepToSize IntRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
287 primRepToSize WordRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
288 primRepToSize AddrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
289 primRepToSize FloatRep = IF_ARCH_alpha(TF, IF_ARCH_i386(F, IF_ARCH_sparc(F, )))
290 primRepToSize DoubleRep = IF_ARCH_alpha(TF, IF_ARCH_i386(DF, IF_ARCH_sparc(DF, )))
291 primRepToSize ArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
292 primRepToSize ByteArrayRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
293 primRepToSize PrimPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
294 primRepToSize WeakPtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
295 primRepToSize ForeignObjRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
296 primRepToSize BCORep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
297 primRepToSize StablePtrRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
298 primRepToSize ThreadIdRep = IF_ARCH_alpha(Q, IF_ARCH_i386(L, IF_ARCH_sparc(W, )))
299 -- SUP: Wrong!!! Only for testing the rest of the NCG
300 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
301 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
304 %************************************************************************
306 \subsection{Machine's assembly language}
308 %************************************************************************
310 We have a few common ``instructions'' (nearly all the pseudo-ops) but
311 mostly all of @Instr@ is machine-specific.
315 = COMMENT FAST_STRING -- comment pseudo-op
316 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
317 | LABEL CLabel -- global label pseudo-op
318 | ASCII Bool -- True <=> needs backslash conversion
319 String -- the literal string
322 | DELTA Int -- specify current stack offset for
323 -- benefit of subsequent passes
327 #if alpha_TARGET_ARCH
329 -- data Instr continues...
333 | LD Size Reg MachRegsAddr -- size, dst, src
334 | LDA Reg MachRegsAddr -- dst, src
335 | LDAH Reg MachRegsAddr -- dst, src
336 | LDGP Reg MachRegsAddr -- dst, src
337 | LDI Size Reg Imm -- size, dst, src
338 | ST Size Reg MachRegsAddr -- size, src, dst
343 | ABS Size RI Reg -- size, src, dst
344 | NEG Size Bool RI Reg -- size, overflow, src, dst
345 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
346 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
347 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
348 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
349 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
350 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
351 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
353 -- Simple bit-twiddling.
373 | CMP Cond Reg RI Reg
380 | FADD Size Reg Reg Reg
381 | FDIV Size Reg Reg Reg
382 | FMUL Size Reg Reg Reg
383 | FSUB Size Reg Reg Reg
384 | CVTxy Size Size Reg Reg
385 | FCMP Size Cond Reg Reg Reg
393 | JMP Reg MachRegsAddr Int
395 | JSR Reg MachRegsAddr Int
397 -- Alpha-specific pseudo-ops.
406 #endif {- alpha_TARGET_ARCH -}
409 Intel, in their infinite wisdom, selected a stack model for floating
410 point registers on x86. That might have made sense back in 1979 --
411 nowadays we can see it for the nonsense it really is. A stack model
412 fits poorly with the existing nativeGen infrastructure, which assumes
413 flat integer and FP register sets. Prior to this commit, nativeGen
414 could not generate correct x86 FP code -- to do so would have meant
415 somehow working the register-stack paradigm into the register
416 allocator and spiller, which sounds very difficult.
418 We have decided to cheat, and go for a simple fix which requires no
419 infrastructure modifications, at the expense of generating ropey but
420 correct FP code. All notions of the x86 FP stack and its insns have
421 been removed. Instead, we pretend (to the instruction selector and
422 register allocator) that x86 has six floating point registers, %fake0
423 .. %fake5, which can be used in the usual flat manner. We further
424 claim that x86 has floating point instructions very similar to SPARC
425 and Alpha, that is, a simple 3-operand register-register arrangement.
426 Code generation and register allocation proceed on this basis.
428 When we come to print out the final assembly, our convenient fiction
429 is converted to dismal reality. Each fake instruction is
430 independently converted to a series of real x86 instructions.
431 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
432 arithmetic operations, the two operands are pushed onto the top of the
433 FP stack, the operation done, and the result copied back into the
434 relevant register. There are only six %fake registers because 2 are
435 needed for the translation, and x86 has 8 in total.
437 The translation is inefficient but is simple and it works. A cleverer
438 translation would handle a sequence of insns, simulating the FP stack
439 contents, would not impose a fixed mapping from %fake to %st regs, and
440 hopefully could avoid most of the redundant reg-reg moves of the
443 We might as well make use of whatever unique FP facilities Intel have
444 chosen to bless us with (let's not be churlish, after all).
445 Hence GLDZ and GLD1. Bwahahahahahahaha!
447 LATER (10 Nov 2000): idiv gives problems with the register spiller,
448 because the spiller is simpleminded and because idiv has fixed uses of
449 %eax and %edx. Rather than make the spiller cleverer, we do away with
450 idiv, and instead have iquot and irem fake (integer) insns, which have
451 no operand register constraints -- ie, they behave like add, sub, mul.
452 The printer-outer transforms them to a sequence of real insns which does
453 the Right Thing (tm). As with the FP stuff, this gives ropey code,
454 but we don't care, since it doesn't get used much. We hope.
459 -- data Instr continues...
463 | MOV Size Operand Operand
464 | MOVZxL Size Operand Operand -- size is the size of operand 1
465 | MOVSxL Size Operand Operand -- size is the size of operand 1
467 -- Load effective address (also a very useful three-operand add instruction :-)
469 | LEA Size Operand Operand
473 | ADD Size Operand Operand
474 | SUB Size Operand Operand
475 | IMUL Size Operand Operand
477 -- Quotient and remainder. SEE comment above -- these are not
478 -- real x86 insns; instead they are expanded when printed
479 -- into a sequence of real insns.
481 | IQUOT Size Operand Operand
482 | IREM Size Operand Operand
484 -- Simple bit-twiddling.
486 | AND Size Operand Operand
487 | OR Size Operand Operand
488 | XOR Size Operand Operand
490 | NEGI Size Operand -- NEG instruction (name clash with Cond)
491 | SHL Size Imm Operand -- Only immediate shifts allowed
492 | SAR Size Imm Operand -- Only immediate shifts allowed
493 | SHR Size Imm Operand -- Only immediate shifts allowed
494 | BT Size Imm Operand
499 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
500 -- as single instructions right up until we spit them out.
502 -- all the 3-operand fake fp insns are src1 src2 dst
503 -- and furthermore are constrained to be fp regs only.
504 -- IMPORTANT: keep is_G_insn up to date with any changes here
505 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
506 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
507 | GST Size Reg MachRegsAddr -- src(fpreg), dst
509 | GLDZ Reg -- dst(fpreg)
510 | GLD1 Reg -- dst(fpreg)
512 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
513 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
515 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
516 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
518 | GITOF Reg Reg -- src(intreg), dst(fpreg)
519 | GITOD Reg Reg -- src(intreg), dst(fpreg)
521 | GADD Size Reg Reg Reg -- src1, src2, dst
522 | GDIV Size Reg Reg Reg -- src1, src2, dst
523 | GSUB Size Reg Reg Reg -- src1, src2, dst
524 | GMUL Size Reg Reg Reg -- src1, src2, dst
526 | GCMP Size Reg Reg -- src1, src2
528 | GABS Size Reg Reg -- src, dst
529 | GNEG Size Reg Reg -- src, dst
530 | GSQRT Size Reg Reg -- src, dst
531 | GSIN Size Reg Reg -- src, dst
532 | GCOS Size Reg Reg -- src, dst
533 | GTAN Size Reg Reg -- src, dst
535 | GFREE -- do ffree on all x86 regs; an ugly hack
538 | TEST Size Operand Operand
539 | CMP Size Operand Operand
551 | JMP DestInfo Operand -- possible dests, target
552 | JXX Cond CLabel -- target
557 | CLTD -- sign extend %eax into %edx:%eax
560 = OpReg Reg -- register
561 | OpImm Imm -- immediate value
562 | OpAddr MachRegsAddr -- memory reference
565 i386_insert_ffrees :: [Instr] -> [Instr]
566 i386_insert_ffrees insns
567 | any is_G_instr insns
568 = concatMap ffree_before_nonlocal_transfers insns
572 ffree_before_nonlocal_transfers insn
574 CALL _ -> [GFREE, insn]
575 -- Jumps to immediate labels are local
576 JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
577 -- If a jump mentions dests, it is a local jump thru
579 JMP (DestInfo _) _ -> [insn]
580 JMP _ _ -> [GFREE, insn]
584 -- if you ever add a new FP insn to the fake x86 FP insn set,
585 -- you must update this too
586 is_G_instr :: Instr -> Bool
589 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
590 GLDZ _ -> True; GLD1 _ -> True;
591 GFTOD _ _ -> True; GFTOI _ _ -> True;
592 GDTOF _ _ -> True; GDTOI _ _ -> True;
593 GITOF _ _ -> True; GITOD _ _ -> True;
594 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
595 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
596 GCMP _ _ _ -> True; GABS _ _ _ -> True
597 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
598 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
599 GFREE -> panic "is_G_instr: GFREE (!)"
602 #endif {- i386_TARGET_ARCH -}
606 #if sparc_TARGET_ARCH
608 -- data Instr continues...
612 | LD Size MachRegsAddr Reg -- size, src, dst
613 | ST Size Reg MachRegsAddr -- size, src, dst
617 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
618 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
620 -- Simple bit-twiddling.
622 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
623 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
624 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
625 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
626 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
627 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
628 | SLL Reg RI Reg -- src1, src2, dst
629 | SRL Reg RI Reg -- src1, src2, dst
630 | SRA Reg RI Reg -- src1, src2, dst
631 | SETHI Imm Reg -- src, dst
632 | NOP -- Really SETHI 0, %g0, but worth an alias
636 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
637 -- right up until we spit them out.
639 | FABS Size Reg Reg -- src dst
640 | FADD Size Reg Reg Reg -- src1, src2, dst
641 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
642 | FDIV Size Reg Reg Reg -- src1, src2, dst
643 | FMOV Size Reg Reg -- src, dst
644 | FMUL Size Reg Reg Reg -- src1, src2, dst
645 | FNEG Size Reg Reg -- src, dst
646 | FSQRT Size Reg Reg -- src, dst
647 | FSUB Size Reg Reg Reg -- src1, src2, dst
648 | FxTOy Size Size Reg Reg -- src, dst
652 | BI Cond Bool Imm -- cond, annul?, target
653 | BF Cond Bool Imm -- cond, annul?, target
655 | JMP DestInfo MachRegsAddr -- target
656 | CALL Imm Int Bool -- target, args, terminal
663 riZero (RIImm (ImmInt 0)) = True
664 riZero (RIImm (ImmInteger 0)) = True
665 riZero (RIReg (RealReg 0)) = True
668 -- Calculate the effective address which would be used by the
669 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
670 -- alas -- can't have fpRelEA here because of module dependencies.
671 fpRelEA :: Int -> Reg -> Instr
673 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
675 -- Code to shift the stack pointer by n words.
676 moveSp :: Int -> Instr
678 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
680 -- Produce the second-half-of-a-double register given the first half.
682 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
683 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
684 #endif {- sparc_TARGET_ARCH -}