2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
34 RI(..), riZero, fpRelEA, moveSp, fPair
38 #include "HsVersions.h"
39 -- #include "config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
48 # if sparc_TARGET_ARCH
52 import PrimRep ( PrimRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment )
54 import Panic ( panic )
55 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
56 import Outputable ( pprPanic, ppr )
57 import IOExts ( trace )
61 underscorePrefix :: Bool -- leading underscore on assembler labels?
63 #ifdef LEADING_UNDERSCORE
64 underscorePrefix = True
66 underscorePrefix = False
69 ---------------------------
70 fmtAsmLbl :: String -> String -- for formatting labels
74 {- The alpha assembler likes temporary labels to look like $L123
75 instead of L123. (Don't toss the L, because then Lf28
84 % ----------------------------------------------------------------
86 We (allegedly) put the first six C-call arguments in registers;
87 where do we start putting the rest of them?
89 eXTRA_STK_ARGS_HERE :: Int
91 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
94 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96 Size of a @PrimRep@, in bytes.
99 sizeOf :: PrimRep -> Integer{-in bytes-}
100 -- the result is an Integer only because it's more convenient
102 sizeOf pr = case (primRepToSize pr) of
103 IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},)
104 IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
105 IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
108 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
110 Now the volatile saves and restores. We add the basic guys to the
111 list of ``user'' registers provided. Note that there are more basic
112 registers on the restore list, because some are reloaded from
115 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
118 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
120 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
121 restore_cands = save_cands
124 = map save ((filter callerSaves) (save_cands ++ vols))
126 save x = StAssign (magicIdPrimRep x) loc reg
128 reg = StReg (StixMagicId x)
129 loc = case stgReg x of
131 Always _ -> panic "volatileSaves"
133 volatileRestores vols
134 = map restore ((filter callerSaves) (restore_cands ++ vols))
136 restore x = StAssign (magicIdPrimRep x) reg loc
138 reg = StReg (StixMagicId x)
139 loc = case stgReg x of
141 Always _ -> panic "volatileRestores"
144 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
146 Obviously slightly weedy
147 (Note that the floating point values aren't terribly important.)
150 targetMinDouble = MachDouble (-1.7976931348623157e+308)
151 targetMaxDouble = MachDouble (1.7976931348623157e+308)
152 targetMinInt = mkMachInt (-2147483648)
153 targetMaxInt = mkMachInt 2147483647
156 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
158 This algorithm for determining the $\log_2$ of exact powers of 2 comes
159 from GCC. It requires bit manipulation primitives, and we use GHC
166 exactLog2 :: Integer -> Maybe Integer
168 = if (x <= 0 || x >= 2147483648) then
171 case (fromInteger x) of { I# x# ->
172 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
175 Just (toInteger (I# (pow2 x#)))
178 shiftr x y = shiftRL# x y
180 pow2 x# | x# ==# 1# = 0#
181 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
184 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
188 #if alpha_TARGET_ARCH
189 = ALWAYS -- For BI (same as BR)
190 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
192 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
193 | LE -- For CMP and BI
194 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
196 | NEVER -- For BI (null instruction)
197 | ULE -- For CMP only
198 | ULT -- For CMP only
201 = ALWAYS -- What's really used? ToDo
215 #if sparc_TARGET_ARCH
216 = ALWAYS -- What's really used? ToDo
237 #if alpha_TARGET_ARCH
240 -- | W -- word (2 bytes): UNUSED
242 -- | L -- longword (4 bytes): UNUSED
243 | Q -- quadword (8 bytes)
244 -- | FF -- VAX F-style floating pt: UNUSED
245 -- | GF -- VAX G-style floating pt: UNUSED
246 -- | DF -- VAX D-style floating pt: UNUSED
247 -- | SF -- IEEE single-precision floating pt: UNUSED
248 | TF -- IEEE double-precision floating pt
252 -- | HB -- higher byte **UNUSED**
255 | F -- IEEE single-precision floating pt
256 | DF -- IEEE single-precision floating pt
257 | F80 -- Intel 80-bit internal FP format; only used for spilling
259 #if sparc_TARGET_ARCH
261 | BU -- byte (unsigned)
262 -- | HW -- halfword, 2 bytes (signed): UNUSED
263 -- | HWU -- halfword, 2 bytes (unsigned): UNUSED
265 -- | D -- doubleword, 8 bytes: UNUSED
266 | F -- IEEE single-precision floating pt
267 | DF -- IEEE single-precision floating pt
270 primRepToSize :: PrimRep -> Size
272 primRepToSize PtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
273 primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
274 primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
275 primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
276 primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
277 primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,)))
278 primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
279 primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
280 primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
281 primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( F ,)))
282 primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,)))
283 primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
284 primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
285 primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
286 primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
287 primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
288 primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
289 -- SUP: Wrong!!! Only for testing the rest of the NCG
290 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
291 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
294 %************************************************************************
296 \subsection{Machine's assembly language}
298 %************************************************************************
300 We have a few common ``instructions'' (nearly all the pseudo-ops) but
301 mostly all of @Instr@ is machine-specific.
305 = COMMENT FAST_STRING -- comment pseudo-op
306 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
307 | LABEL CLabel -- global label pseudo-op
308 | ASCII Bool -- True <=> needs backslash conversion
309 String -- the literal string
312 | DELTA Int -- specify current stack offset for
313 -- benefit of subsequent passes
317 #if alpha_TARGET_ARCH
319 -- data Instr continues...
323 | LD Size Reg MachRegsAddr -- size, dst, src
324 | LDA Reg MachRegsAddr -- dst, src
325 | LDAH Reg MachRegsAddr -- dst, src
326 | LDGP Reg MachRegsAddr -- dst, src
327 | LDI Size Reg Imm -- size, dst, src
328 | ST Size Reg MachRegsAddr -- size, src, dst
333 | ABS Size RI Reg -- size, src, dst
334 | NEG Size Bool RI Reg -- size, overflow, src, dst
335 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
336 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
337 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
338 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
339 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
340 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
341 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
343 -- Simple bit-twiddling.
363 | CMP Cond Reg RI Reg
370 | FADD Size Reg Reg Reg
371 | FDIV Size Reg Reg Reg
372 | FMUL Size Reg Reg Reg
373 | FSUB Size Reg Reg Reg
374 | CVTxy Size Size Reg Reg
375 | FCMP Size Cond Reg Reg Reg
383 | JMP Reg MachRegsAddr Int
385 | JSR Reg MachRegsAddr Int
387 -- Alpha-specific pseudo-ops.
396 #endif {- alpha_TARGET_ARCH -}
399 Intel, in their infinite wisdom, selected a stack model for floating
400 point registers on x86. That might have made sense back in 1979 --
401 nowadays we can see it for the nonsense it really is. A stack model
402 fits poorly with the existing nativeGen infrastructure, which assumes
403 flat integer and FP register sets. Prior to this commit, nativeGen
404 could not generate correct x86 FP code -- to do so would have meant
405 somehow working the register-stack paradigm into the register
406 allocator and spiller, which sounds very difficult.
408 We have decided to cheat, and go for a simple fix which requires no
409 infrastructure modifications, at the expense of generating ropey but
410 correct FP code. All notions of the x86 FP stack and its insns have
411 been removed. Instead, we pretend (to the instruction selector and
412 register allocator) that x86 has six floating point registers, %fake0
413 .. %fake5, which can be used in the usual flat manner. We further
414 claim that x86 has floating point instructions very similar to SPARC
415 and Alpha, that is, a simple 3-operand register-register arrangement.
416 Code generation and register allocation proceed on this basis.
418 When we come to print out the final assembly, our convenient fiction
419 is converted to dismal reality. Each fake instruction is
420 independently converted to a series of real x86 instructions.
421 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
422 arithmetic operations, the two operands are pushed onto the top of the
423 FP stack, the operation done, and the result copied back into the
424 relevant register. There are only six %fake registers because 2 are
425 needed for the translation, and x86 has 8 in total.
427 The translation is inefficient but is simple and it works. A cleverer
428 translation would handle a sequence of insns, simulating the FP stack
429 contents, would not impose a fixed mapping from %fake to %st regs, and
430 hopefully could avoid most of the redundant reg-reg moves of the
433 We might as well make use of whatever unique FP facilities Intel have
434 chosen to bless us with (let's not be churlish, after all).
435 Hence GLDZ and GLD1. Bwahahahahahahaha!
440 -- data Instr continues...
444 | MOV Size Operand Operand
445 | MOVZxL Size Operand Operand -- size is the size of operand 1
446 | MOVSxL Size Operand Operand -- size is the size of operand 1
448 -- Load effective address (also a very useful three-operand add instruction :-)
450 | LEA Size Operand Operand
454 | ADD Size Operand Operand
455 | SUB Size Operand Operand
457 -- Multiplication (signed and unsigned), Division (signed and unsigned),
458 -- result in %eax, %edx.
460 | IMUL Size Operand Operand
463 -- Simple bit-twiddling.
465 | AND Size Operand Operand
466 | OR Size Operand Operand
467 | XOR Size Operand Operand
469 | NEGI Size Operand -- NEG instruction (name clash with Cond)
470 | SHL Size Imm Operand -- Only immediate shifts allowed
471 | SAR Size Imm Operand -- Only immediate shifts allowed
472 | SHR Size Imm Operand -- Only immediate shifts allowed
473 | BT Size Imm Operand
478 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
479 -- as single instructions right up until we spit them out.
481 -- all the 3-operand fake fp insns are src1 src2 dst
482 -- and furthermore are constrained to be fp regs only.
483 -- IMPORTANT: keep is_G_insn up to date with any changes here
484 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
485 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
486 | GST Size Reg MachRegsAddr -- src(fpreg), dst
488 | GLDZ Reg -- dst(fpreg)
489 | GLD1 Reg -- dst(fpreg)
491 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
492 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
494 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
495 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
497 | GITOF Reg Reg -- src(intreg), dst(fpreg)
498 | GITOD Reg Reg -- src(intreg), dst(fpreg)
500 | GADD Size Reg Reg Reg -- src1, src2, dst
501 | GDIV Size Reg Reg Reg -- src1, src2, dst
502 | GSUB Size Reg Reg Reg -- src1, src2, dst
503 | GMUL Size Reg Reg Reg -- src1, src2, dst
505 | GCMP Size Reg Reg -- src1, src2
507 | GABS Size Reg Reg -- src, dst
508 | GNEG Size Reg Reg -- src, dst
509 | GSQRT Size Reg Reg -- src, dst
510 | GSIN Size Reg Reg -- src, dst
511 | GCOS Size Reg Reg -- src, dst
512 | GTAN Size Reg Reg -- src, dst
514 | GFREE -- do ffree on all x86 regs; an ugly hack
517 | TEST Size Operand Operand
518 | CMP Size Operand Operand
530 | JMP Operand -- target
531 | JXX Cond CLabel -- target
536 | CLTD -- sign extend %eax into %edx:%eax
539 = OpReg Reg -- register
540 | OpImm Imm -- immediate value
541 | OpAddr MachRegsAddr -- memory reference
544 i386_insert_ffrees :: [Instr] -> [Instr]
545 i386_insert_ffrees insns
546 | any is_G_instr insns
547 = concatMap ffree_before_nonlocal_transfers insns
551 ffree_before_nonlocal_transfers insn
553 CALL _ -> [GFREE, insn]
554 JMP (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
555 JMP _ -> [GFREE, insn]
559 -- if you ever add a new FP insn to the fake x86 FP insn set,
560 -- you must update this too
561 is_G_instr :: Instr -> Bool
564 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
565 GLDZ _ -> True; GLD1 _ -> True;
566 GFTOD _ _ -> True; GFTOI _ _ -> True;
567 GDTOF _ _ -> True; GDTOI _ _ -> True;
568 GITOF _ _ -> True; GITOD _ _ -> True;
569 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
570 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
571 GCMP _ _ _ -> True; GABS _ _ _ -> True
572 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
573 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
574 GFREE -> panic "is_G_instr: GFREE (!)"
577 #endif {- i386_TARGET_ARCH -}
581 #if sparc_TARGET_ARCH
583 -- data Instr continues...
587 | LD Size MachRegsAddr Reg -- size, src, dst
588 | ST Size Reg MachRegsAddr -- size, src, dst
592 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
593 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
595 -- Simple bit-twiddling.
597 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
598 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
599 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
600 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
601 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
602 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
603 | SLL Reg RI Reg -- src1, src2, dst
604 | SRL Reg RI Reg -- src1, src2, dst
605 | SRA Reg RI Reg -- src1, src2, dst
606 | SETHI Imm Reg -- src, dst
607 | NOP -- Really SETHI 0, %g0, but worth an alias
611 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
612 -- right up until we spit them out.
614 | FABS Size Reg Reg -- src dst
615 | FADD Size Reg Reg Reg -- src1, src2, dst
616 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
617 | FDIV Size Reg Reg Reg -- src1, src2, dst
618 | FMOV Size Reg Reg -- src, dst
619 | FMUL Size Reg Reg Reg -- src1, src2, dst
620 | FNEG Size Reg Reg -- src, dst
621 | FSQRT Size Reg Reg -- src, dst
622 | FSUB Size Reg Reg Reg -- src1, src2, dst
623 | FxTOy Size Size Reg Reg -- src, dst
627 | BI Cond Bool Imm -- cond, annul?, target
628 | BF Cond Bool Imm -- cond, annul?, target
630 | JMP MachRegsAddr -- target
631 | CALL Imm Int Bool -- target, args, terminal
638 riZero (RIImm (ImmInt 0)) = True
639 riZero (RIImm (ImmInteger 0)) = True
640 riZero (RIReg (RealReg 0)) = True
643 -- Calculate the effective address which would be used by the
644 -- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
645 -- alas -- can't have fpRelEA here because of module dependencies.
646 fpRelEA :: Int -> Reg -> Instr
648 = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
650 -- Code to shift the stack pointer by n words.
651 moveSp :: Int -> Instr
653 = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
655 -- Produce the second-half-of-a-double register given the first half.
657 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1)
658 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
659 #endif {- sparc_TARGET_ARCH -}