2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 -- #include "config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import SMRep ( SMRep(..) )
54 import Stix ( StixTree(..), StixReg(..), CodeSegment )
55 import Panic ( panic )
56 import Char ( isDigit )
57 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
58 import Outputable ( text, pprPanic, ppr )
59 import IOExts ( trace )
63 underscorePrefix :: Bool -- leading underscore on assembler labels?
65 #ifdef LEADING_UNDERSCORE
66 underscorePrefix = True
68 underscorePrefix = False
71 ---------------------------
72 fmtAsmLbl :: String -> String -- for formatting labels
76 {- The alpha assembler likes temporary labels to look like $L123
77 instead of L123. (Don't toss the L, because then Lf28
86 % ----------------------------------------------------------------
88 We (allegedly) put the first six C-call arguments in registers;
89 where do we start putting the rest of them?
91 eXTRA_STK_ARGS_HERE :: Int
93 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
96 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
98 Size of a @PrimRep@, in bytes.
101 sizeOf :: PrimRep -> Integer{-in bytes-}
102 -- the result is an Integer only because it's more convenient
104 sizeOf pr = case (primRepToSize pr) of
105 IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},)
106 IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
107 IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
110 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
112 Now the volatile saves and restores. We add the basic guys to the
113 list of ``user'' registers provided. Note that there are more basic
114 registers on the restore list, because some are reloaded from
117 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
120 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
122 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
123 restore_cands = save_cands
126 = map save ((filter callerSaves) (save_cands ++ vols))
128 save x = StAssign (magicIdPrimRep x) loc reg
130 reg = StReg (StixMagicId x)
131 loc = case stgReg x of
133 Always _ -> panic "volatileSaves"
135 volatileRestores vols
136 = map restore ((filter callerSaves) (restore_cands ++ vols))
138 restore x = StAssign (magicIdPrimRep x) reg loc
140 reg = StReg (StixMagicId x)
141 loc = case stgReg x of
143 Always _ -> panic "volatileRestores"
146 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
148 Obviously slightly weedy
149 (Note that the floating point values aren't terribly important.)
152 targetMinDouble = MachDouble (-1.7976931348623157e+308)
153 targetMaxDouble = MachDouble (1.7976931348623157e+308)
154 targetMinInt = mkMachInt (-2147483648)
155 targetMaxInt = mkMachInt 2147483647
158 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
160 This algorithm for determining the $\log_2$ of exact powers of 2 comes
161 from GCC. It requires bit manipulation primitives, and we use GHC
168 exactLog2 :: Integer -> Maybe Integer
170 = if (x <= 0 || x >= 2147483648) then
173 case (fromInteger x) of { I# x# ->
174 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
177 Just (toInteger (I# (pow2 x#)))
180 shiftr x y = shiftRL# x y
182 pow2 x# | x# ==# 1# = 0#
183 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
186 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
190 #if alpha_TARGET_ARCH
191 = ALWAYS -- For BI (same as BR)
192 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
194 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
195 | LE -- For CMP and BI
196 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
198 | NEVER -- For BI (null instruction)
199 | ULE -- For CMP only
200 | ULT -- For CMP only
203 = ALWAYS -- What's really used? ToDo
217 #if sparc_TARGET_ARCH
218 = ALWAYS -- What's really used? ToDo
239 #if alpha_TARGET_ARCH
242 -- | W -- word (2 bytes): UNUSED
244 -- | L -- longword (4 bytes): UNUSED
245 | Q -- quadword (8 bytes)
246 -- | FF -- VAX F-style floating pt: UNUSED
247 -- | GF -- VAX G-style floating pt: UNUSED
248 -- | DF -- VAX D-style floating pt: UNUSED
249 -- | SF -- IEEE single-precision floating pt: UNUSED
250 | TF -- IEEE double-precision floating pt
254 -- | HB -- higher byte **UNUSED**
257 | F -- IEEE single-precision floating pt
258 | DF -- IEEE single-precision floating pt
259 | F80 -- Intel 80-bit internal FP format; only used for spilling
261 #if sparc_TARGET_ARCH
263 | BU -- byte (unsigned)
264 -- | HW -- halfword, 2 bytes (signed): UNUSED
265 -- | HWU -- halfword, 2 bytes (unsigned): UNUSED
267 -- | D -- doubleword, 8 bytes: UNUSED
268 | F -- IEEE single-precision floating pt
269 | DF -- IEEE single-precision floating pt
272 primRepToSize :: PrimRep -> Size
274 primRepToSize PtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
275 primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
276 primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
277 primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
278 primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
279 primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,)))
280 primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
281 primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
282 primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
283 primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( F ,)))
284 primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,)))
285 primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
286 primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
287 primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
288 primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
289 primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
290 primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
291 -- SUP: Wrong!!! Only for testing the rest of the NCG
292 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
293 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
296 %************************************************************************
298 \subsection{Machine's assembly language}
300 %************************************************************************
302 We have a few common ``instructions'' (nearly all the pseudo-ops) but
303 mostly all of @Instr@ is machine-specific.
307 = COMMENT FAST_STRING -- comment pseudo-op
308 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
309 | LABEL CLabel -- global label pseudo-op
310 | ASCII Bool -- True <=> needs backslash conversion
311 String -- the literal string
314 | DELTA Int -- specify current stack offset for
315 -- benefit of subsequent passes
319 #if alpha_TARGET_ARCH
321 -- data Instr continues...
325 | LD Size Reg MachRegsAddr -- size, dst, src
326 | LDA Reg MachRegsAddr -- dst, src
327 | LDAH Reg MachRegsAddr -- dst, src
328 | LDGP Reg MachRegsAddr -- dst, src
329 | LDI Size Reg Imm -- size, dst, src
330 | ST Size Reg MachRegsAddr -- size, src, dst
335 | ABS Size RI Reg -- size, src, dst
336 | NEG Size Bool RI Reg -- size, overflow, src, dst
337 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
338 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
339 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
340 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
341 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
342 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
343 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
345 -- Simple bit-twiddling.
365 | CMP Cond Reg RI Reg
372 | FADD Size Reg Reg Reg
373 | FDIV Size Reg Reg Reg
374 | FMUL Size Reg Reg Reg
375 | FSUB Size Reg Reg Reg
376 | CVTxy Size Size Reg Reg
377 | FCMP Size Cond Reg Reg Reg
385 | JMP Reg MachRegsAddr Int
387 | JSR Reg MachRegsAddr Int
389 -- Alpha-specific pseudo-ops.
398 #endif {- alpha_TARGET_ARCH -}
401 Intel, in their infinite wisdom, selected a stack model for floating
402 point registers on x86. That might have made sense back in 1979 --
403 nowadays we can see it for the nonsense it really is. A stack model
404 fits poorly with the existing nativeGen infrastructure, which assumes
405 flat integer and FP register sets. Prior to this commit, nativeGen
406 could not generate correct x86 FP code -- to do so would have meant
407 somehow working the register-stack paradigm into the register
408 allocator and spiller, which sounds very difficult.
410 We have decided to cheat, and go for a simple fix which requires no
411 infrastructure modifications, at the expense of generating ropey but
412 correct FP code. All notions of the x86 FP stack and its insns have
413 been removed. Instead, we pretend (to the instruction selector and
414 register allocator) that x86 has six floating point registers, %fake0
415 .. %fake5, which can be used in the usual flat manner. We further
416 claim that x86 has floating point instructions very similar to SPARC
417 and Alpha, that is, a simple 3-operand register-register arrangement.
418 Code generation and register allocation proceed on this basis.
420 When we come to print out the final assembly, our convenient fiction
421 is converted to dismal reality. Each fake instruction is
422 independently converted to a series of real x86 instructions.
423 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
424 arithmetic operations, the two operands are pushed onto the top of the
425 FP stack, the operation done, and the result copied back into the
426 relevant register. There are only six %fake registers because 2 are
427 needed for the translation, and x86 has 8 in total.
429 The translation is inefficient but is simple and it works. A cleverer
430 translation would handle a sequence of insns, simulating the FP stack
431 contents, would not impose a fixed mapping from %fake to %st regs, and
432 hopefully could avoid most of the redundant reg-reg moves of the
435 We might as well make use of whatever unique FP facilities Intel have
436 chosen to bless us with (let's not be churlish, after all).
437 Hence GLDZ and GLD1. Bwahahahahahahaha!
442 -- data Instr continues...
446 | MOV Size Operand Operand
447 | MOVZxL Size Operand Operand -- size is the size of operand 1
448 | MOVSxL Size Operand Operand -- size is the size of operand 1
450 -- Load effective address (also a very useful three-operand add instruction :-)
452 | LEA Size Operand Operand
456 | ADD Size Operand Operand
457 | SUB Size Operand Operand
459 -- Multiplication (signed and unsigned), Division (signed and unsigned),
460 -- result in %eax, %edx.
462 | IMUL Size Operand Operand
465 -- Simple bit-twiddling.
467 | AND Size Operand Operand
468 | OR Size Operand Operand
469 | XOR Size Operand Operand
471 | NEGI Size Operand -- NEG instruction (name clash with Cond)
472 | SHL Size Imm Operand -- Only immediate shifts allowed
473 | SAR Size Imm Operand -- Only immediate shifts allowed
474 | SHR Size Imm Operand -- Only immediate shifts allowed
475 | BT Size Imm Operand
480 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
481 -- as single instructions right up until we spit them out.
483 -- all the 3-operand fake fp insns are src1 src2 dst
484 -- and furthermore are constrained to be fp regs only.
485 -- IMPORTANT: keep is_G_insn up to date with any changes here
486 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
487 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
488 | GST Size Reg MachRegsAddr -- src(fpreg), dst
490 | GLDZ Reg -- dst(fpreg)
491 | GLD1 Reg -- dst(fpreg)
493 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
494 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
496 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
497 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
499 | GITOF Reg Reg -- src(intreg), dst(fpreg)
500 | GITOD Reg Reg -- src(intreg), dst(fpreg)
502 | GADD Size Reg Reg Reg -- src1, src2, dst
503 | GDIV Size Reg Reg Reg -- src1, src2, dst
504 | GSUB Size Reg Reg Reg -- src1, src2, dst
505 | GMUL Size Reg Reg Reg -- src1, src2, dst
507 | GCMP Size Reg Reg -- src1, src2
509 | GABS Size Reg Reg -- src, dst
510 | GNEG Size Reg Reg -- src, dst
511 | GSQRT Size Reg Reg -- src, dst
512 | GSIN Size Reg Reg -- src, dst
513 | GCOS Size Reg Reg -- src, dst
514 | GTAN Size Reg Reg -- src, dst
516 | GFREE -- do ffree on all x86 regs; an ugly hack
519 | TEST Size Operand Operand
520 | CMP Size Operand Operand
532 | JMP Operand -- target
533 | JXX Cond CLabel -- target
538 | CLTD -- sign extend %eax into %edx:%eax
541 = OpReg Reg -- register
542 | OpImm Imm -- immediate value
543 | OpAddr MachRegsAddr -- memory reference
546 i386_insert_ffrees :: [Instr] -> [Instr]
547 i386_insert_ffrees insns
548 | any is_G_instr insns
549 = concatMap ffree_before_nonlocal_transfers insns
553 ffree_before_nonlocal_transfers insn
555 CALL _ -> [GFREE, insn]
556 JMP (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
557 JMP _ -> [GFREE, insn]
561 -- if you ever add a new FP insn to the fake x86 FP insn set,
562 -- you must update this too
563 is_G_instr :: Instr -> Bool
566 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
567 GLDZ _ -> True; GLD1 _ -> True;
568 GFTOD _ _ -> True; GFTOI _ _ -> True;
569 GDTOF _ _ -> True; GDTOI _ _ -> True;
570 GITOF _ _ -> True; GITOD _ _ -> True;
571 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
572 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
573 GCMP _ _ _ -> True; GABS _ _ _ -> True
574 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
575 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
576 GFREE -> panic "is_G_instr: GFREE (!)"
579 #endif {- i386_TARGET_ARCH -}
583 #if sparc_TARGET_ARCH
585 -- data Instr continues...
589 | LD Size MachRegsAddr Reg -- size, src, dst
590 | ST Size Reg MachRegsAddr -- size, src, dst
594 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
595 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
597 -- Simple bit-twiddling.
599 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
600 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
601 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
602 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
603 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
604 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
605 | SLL Reg RI Reg -- src1, src2, dst
606 | SRL Reg RI Reg -- src1, src2, dst
607 | SRA Reg RI Reg -- src1, src2, dst
608 | SETHI Imm Reg -- src, dst
609 | NOP -- Really SETHI 0, %g0, but worth an alias
613 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
614 -- right up until we spit them out.
616 | FABS Size Reg Reg -- src dst
617 | FADD Size Reg Reg Reg -- src1, src2, dst
618 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
619 | FDIV Size Reg Reg Reg -- src1, src2, dst
620 | FMOV Size Reg Reg -- src, dst
621 | FMUL Size Reg Reg Reg -- src1, src2, dst
622 | FNEG Size Reg Reg -- src, dst
623 | FSQRT Size Reg Reg -- src, dst
624 | FSUB Size Reg Reg Reg -- src1, src2, dst
625 | FxTOy Size Size Reg Reg -- src, dst
629 | BI Cond Bool Imm -- cond, annul?, target
630 | BF Cond Bool Imm -- cond, annul?, target
632 | JMP MachRegsAddr -- target
633 | CALL Imm Int Bool -- target, args, terminal
640 riZero (RIImm (ImmInt 0)) = True
641 riZero (RIImm (ImmInteger 0)) = True
642 riZero (RIReg (RealReg 0)) = True
645 -- Calculate the effective address which would be used by the
646 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
647 -- alas -- can't have fpRelEA here because of module dependencies.
648 fpRelEA :: Int -> Reg -> Instr
650 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
652 -- Code to shift the stack pointer by n words.
653 moveSp :: Int -> Instr
655 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
657 -- Produce the second-half-of-a-double register given the first half.
659 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
660 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
661 #endif {- sparc_TARGET_ARCH -}