2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 stixFor_stdout, stixFor_stderr, stixFor_stdin,
25 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
28 IF_ARCH_i386(i386_insert_ffrees COMMA,)
40 #include "HsVersions.h"
41 -- #include "config.h"
43 import AbsCSyn ( MagicId(..) )
44 import AbsCUtils ( magicIdPrimRep )
45 import CLabel ( CLabel, isAsmTemp )
46 import Literal ( mkMachInt, Literal(..) )
47 import MachRegs ( stgReg, callerSaves, RegLoc(..),
51 import PrimRep ( PrimRep(..) )
52 import SMRep ( SMRep(..) )
53 import Stix ( StixTree(..), StixReg(..), CodeSegment )
54 import Panic ( panic )
55 import Char ( isDigit )
56 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
57 import Outputable ( text )
58 import IOExts ( trace )
62 underscorePrefix :: Bool -- leading underscore on assembler labels?
64 #ifdef LEADING_UNDERSCORE
65 underscorePrefix = True
67 underscorePrefix = False
70 ---------------------------
71 fmtAsmLbl :: String -> String -- for formatting labels
75 {- The alpha assembler likes temporary labels to look like $L123
76 instead of L123. (Don't toss the L, because then Lf28
84 ---------------------------
85 stixFor_stdout, stixFor_stderr, stixFor_stdin :: StixTree
87 -- Linux glibc 2 / libc6
88 stixFor_stdout = StInd PtrRep (StLitLbl (text "stdout"))
89 stixFor_stderr = StInd PtrRep (StLitLbl (text "stderr"))
90 stixFor_stdin = StInd PtrRep (StLitLbl (text "stdin"))
94 stixFor_stdout = error "stixFor_stdout: not implemented for Alpha"
95 stixFor_stderr = error "stixFor_stderr: not implemented for Alpha"
96 stixFor_stdin = error "stixFor_stdin: not implemented for Alpha"
100 stixFor_stdout = error "stixFor_stdout: not implemented for Sparc"
101 stixFor_stderr = error "stixFor_stderr: not implemented for Sparc"
102 stixFor_stdin = error "stixFor_stdin: not implemented for Sparc"
106 Here's some old stuff from which it shouldn't be too hard to
107 implement the above for Alpha/Sparc.
109 cvtLitLit :: String -> String
112 -- Rather than relying on guessing, use FILE_SIZE to compute the
115 cvtLitLit "stdin" = IF_ARCH_alpha("_iob+0" {-probably OK...-}
116 ,IF_ARCH_i386("stdin"
117 ,IF_ARCH_sparc("__iob+0x0"{-probably OK...-}
120 cvtLitLit "stdout" = IF_ARCH_alpha("_iob+"++show (``FILE_SIZE''::Int)
121 ,IF_ARCH_i386("stdout"
122 ,IF_ARCH_sparc("__iob+"++show (``FILE_SIZE''::Int)
124 cvtLitLit "stderr" = IF_ARCH_alpha("_iob+"++show (2*(``FILE_SIZE''::Int))
125 ,IF_ARCH_i386("stderr"
126 ,IF_ARCH_sparc("__iob+"++show (2*(``FILE_SIZE''::Int))
132 % ----------------------------------------------------------------
134 We (allegedly) put the first six C-call arguments in registers;
135 where do we start putting the rest of them?
137 eXTRA_STK_ARGS_HERE :: Int
139 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
142 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
144 Size of a @PrimRep@, in bytes.
147 sizeOf :: PrimRep -> Integer{-in bytes-}
148 -- the result is an Integer only because it's more convenient
150 sizeOf pr = case (primRepToSize pr) of
151 IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},)
152 IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
153 IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
156 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
158 Now the volatile saves and restores. We add the basic guys to the
159 list of ``user'' registers provided. Note that there are more basic
160 registers on the restore list, because some are reloaded from
163 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
166 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
168 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
169 restore_cands = save_cands
172 = map save ((filter callerSaves) (save_cands ++ vols))
174 save x = StAssign (magicIdPrimRep x) loc reg
176 reg = StReg (StixMagicId x)
177 loc = case stgReg x of
179 Always _ -> panic "volatileSaves"
181 volatileRestores vols
182 = map restore ((filter callerSaves) (restore_cands ++ vols))
184 restore x = StAssign (magicIdPrimRep x) reg loc
186 reg = StReg (StixMagicId x)
187 loc = case stgReg x of
189 Always _ -> panic "volatileRestores"
192 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
194 Obviously slightly weedy
195 (Note that the floating point values aren't terribly important.)
198 targetMinDouble = MachDouble (-1.7976931348623157e+308)
199 targetMaxDouble = MachDouble (1.7976931348623157e+308)
200 targetMinInt = mkMachInt (-2147483648)
201 targetMaxInt = mkMachInt 2147483647
204 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
206 This algorithm for determining the $\log_2$ of exact powers of 2 comes
207 from GCC. It requires bit manipulation primitives, and we use GHC
214 exactLog2 :: Integer -> Maybe Integer
216 = if (x <= 0 || x >= 2147483648) then
219 case (fromInteger x) of { I# x# ->
220 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
223 Just (toInteger (I# (pow2 x#)))
226 shiftr x y = shiftRL# x y
228 pow2 x# | x# ==# 1# = 0#
229 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
232 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
236 #if alpha_TARGET_ARCH
237 = ALWAYS -- For BI (same as BR)
238 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
240 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
241 | LE -- For CMP and BI
242 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
244 | NEVER -- For BI (null instruction)
245 | ULE -- For CMP only
246 | ULT -- For CMP only
249 = ALWAYS -- What's really used? ToDo
263 #if sparc_TARGET_ARCH
264 = ALWAYS -- What's really used? ToDo
285 #if alpha_TARGET_ARCH
288 -- | W -- word (2 bytes): UNUSED
290 -- | L -- longword (4 bytes): UNUSED
291 | Q -- quadword (8 bytes)
292 -- | FF -- VAX F-style floating pt: UNUSED
293 -- | GF -- VAX G-style floating pt: UNUSED
294 -- | DF -- VAX D-style floating pt: UNUSED
295 -- | SF -- IEEE single-precision floating pt: UNUSED
296 | TF -- IEEE double-precision floating pt
300 -- | HB -- higher byte **UNUSED**
303 | F -- IEEE single-precision floating pt
304 | DF -- IEEE single-precision floating pt
305 | F80 -- Intel 80-bit internal FP format; only used for spilling
307 #if sparc_TARGET_ARCH
309 | BU -- byte (unsigned)
310 -- | HW -- halfword, 2 bytes (signed): UNUSED
311 -- | HWU -- halfword, 2 bytes (unsigned): UNUSED
313 -- | D -- doubleword, 8 bytes: UNUSED
314 | F -- IEEE single-precision floating pt
315 | DF -- IEEE single-precision floating pt
318 primRepToSize :: PrimRep -> Size
320 primRepToSize PtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
321 primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
322 primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
323 primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
324 primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
325 primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,)))
326 primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
327 primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
328 primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
329 primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( F ,)))
330 primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,)))
331 primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
332 primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
333 primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
334 primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
335 primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
336 primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
337 -- SUP: Wrong!!! Only for testing the rest of the NCG
338 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
339 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
342 %************************************************************************
344 \subsection{Machine's assembly language}
346 %************************************************************************
348 We have a few common ``instructions'' (nearly all the pseudo-ops) but
349 mostly all of @Instr@ is machine-specific.
353 = COMMENT FAST_STRING -- comment pseudo-op
354 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
355 | LABEL CLabel -- global label pseudo-op
356 | ASCII Bool -- True <=> needs backslash conversion
357 String -- the literal string
360 | DELTA Int -- specify current stack offset for
361 -- benefit of subsequent passes
365 #if alpha_TARGET_ARCH
367 -- data Instr continues...
371 | LD Size Reg MachRegsAddr -- size, dst, src
372 | LDA Reg MachRegsAddr -- dst, src
373 | LDAH Reg MachRegsAddr -- dst, src
374 | LDGP Reg MachRegsAddr -- dst, src
375 | LDI Size Reg Imm -- size, dst, src
376 | ST Size Reg MachRegsAddr -- size, src, dst
381 | ABS Size RI Reg -- size, src, dst
382 | NEG Size Bool RI Reg -- size, overflow, src, dst
383 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
384 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
385 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
386 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
387 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
388 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
389 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
391 -- Simple bit-twiddling.
411 | CMP Cond Reg RI Reg
418 | FADD Size Reg Reg Reg
419 | FDIV Size Reg Reg Reg
420 | FMUL Size Reg Reg Reg
421 | FSUB Size Reg Reg Reg
422 | CVTxy Size Size Reg Reg
423 | FCMP Size Cond Reg Reg Reg
431 | JMP Reg MachRegsAddr Int
433 | JSR Reg MachRegsAddr Int
435 -- Alpha-specific pseudo-ops.
444 #endif {- alpha_TARGET_ARCH -}
447 Intel, in their infinite wisdom, selected a stack model for floating
448 point registers on x86. That might have made sense back in 1979 --
449 nowadays we can see it for the nonsense it really is. A stack model
450 fits poorly with the existing nativeGen infrastructure, which assumes
451 flat integer and FP register sets. Prior to this commit, nativeGen
452 could not generate correct x86 FP code -- to do so would have meant
453 somehow working the register-stack paradigm into the register
454 allocator and spiller, which sounds very difficult.
456 We have decided to cheat, and go for a simple fix which requires no
457 infrastructure modifications, at the expense of generating ropey but
458 correct FP code. All notions of the x86 FP stack and its insns have
459 been removed. Instead, we pretend (to the instruction selector and
460 register allocator) that x86 has six floating point registers, %fake0
461 .. %fake5, which can be used in the usual flat manner. We further
462 claim that x86 has floating point instructions very similar to SPARC
463 and Alpha, that is, a simple 3-operand register-register arrangement.
464 Code generation and register allocation proceed on this basis.
466 When we come to print out the final assembly, our convenient fiction
467 is converted to dismal reality. Each fake instruction is
468 independently converted to a series of real x86 instructions.
469 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
470 arithmetic operations, the two operands are pushed onto the top of the
471 FP stack, the operation done, and the result copied back into the
472 relevant register. There are only six %fake registers because 2 are
473 needed for the translation, and x86 has 8 in total.
475 The translation is inefficient but is simple and it works. A cleverer
476 translation would handle a sequence of insns, simulating the FP stack
477 contents, would not impose a fixed mapping from %fake to %st regs, and
478 hopefully could avoid most of the redundant reg-reg moves of the
481 We might as well make use of whatever unique FP facilities Intel have
482 chosen to bless us with (let's not be churlish, after all).
483 Hence GLDZ and GLD1. Bwahahahahahahaha!
488 -- data Instr continues...
492 | MOV Size Operand Operand
493 | MOVZxL Size Operand Operand -- size is the size of operand 1
494 | MOVSxL Size Operand Operand -- size is the size of operand 1
496 -- Load effective address (also a very useful three-operand add instruction :-)
498 | LEA Size Operand Operand
502 | ADD Size Operand Operand
503 | SUB Size Operand Operand
505 -- Multiplication (signed and unsigned), Division (signed and unsigned),
506 -- result in %eax, %edx.
508 | IMUL Size Operand Operand
511 -- Simple bit-twiddling.
513 | AND Size Operand Operand
514 | OR Size Operand Operand
515 | XOR Size Operand Operand
517 | NEGI Size Operand -- NEG instruction (name clash with Cond)
518 | SHL Size Imm Operand -- Only immediate shifts allowed
519 | SAR Size Imm Operand -- Only immediate shifts allowed
520 | SHR Size Imm Operand -- Only immediate shifts allowed
521 | BT Size Imm Operand
526 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
527 -- as single instructions right up until we spit them out.
529 -- all the 3-operand fake fp insns are src1 src2 dst
530 -- and furthermore are constrained to be fp regs only.
531 -- IMPORTANT: keep is_G_insn up to date with any changes here
532 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
533 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
534 | GST Size Reg MachRegsAddr -- src(fpreg), dst
536 | GLDZ Reg -- dst(fpreg)
537 | GLD1 Reg -- dst(fpreg)
539 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
540 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
542 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
543 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
545 | GITOF Reg Reg -- src(intreg), dst(fpreg)
546 | GITOD Reg Reg -- src(intreg), dst(fpreg)
548 | GADD Size Reg Reg Reg -- src1, src2, dst
549 | GDIV Size Reg Reg Reg -- src1, src2, dst
550 | GSUB Size Reg Reg Reg -- src1, src2, dst
551 | GMUL Size Reg Reg Reg -- src1, src2, dst
553 | GCMP Size Reg Reg -- src1, src2
555 | GABS Size Reg Reg -- src, dst
556 | GNEG Size Reg Reg -- src, dst
557 | GSQRT Size Reg Reg -- src, dst
558 | GSIN Size Reg Reg -- src, dst
559 | GCOS Size Reg Reg -- src, dst
560 | GTAN Size Reg Reg -- src, dst
562 | GFREE -- do ffree on all x86 regs; an ugly hack
565 | TEST Size Operand Operand
566 | CMP Size Operand Operand
578 | JMP Operand -- target
579 | JXX Cond CLabel -- target
584 | CLTD -- sign extend %eax into %edx:%eax
587 = OpReg Reg -- register
588 | OpImm Imm -- immediate value
589 | OpAddr MachRegsAddr -- memory reference
592 i386_insert_ffrees :: [Instr] -> [Instr]
593 i386_insert_ffrees insns
594 | any is_G_instr insns
595 = concatMap ffree_before_nonlocal_transfers insns
599 ffree_before_nonlocal_transfers insn
601 CALL _ -> [GFREE, insn]
602 JMP (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
603 JMP _ -> [GFREE, insn]
607 -- if you ever add a new FP insn to the fake x86 FP insn set,
608 -- you must update this too
609 is_G_instr :: Instr -> Bool
612 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
613 GLDZ _ -> True; GLD1 _ -> True;
614 GFTOD _ _ -> True; GFTOI _ _ -> True;
615 GDTOF _ _ -> True; GDTOI _ _ -> True;
616 GITOF _ _ -> True; GITOD _ _ -> True;
617 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
618 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
619 GCMP _ _ _ -> True; GABS _ _ _ -> True
620 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
621 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
622 GFREE -> panic "is_G_instr: GFREE (!)"
625 #endif {- i386_TARGET_ARCH -}
629 #if sparc_TARGET_ARCH
631 -- data Instr continues...
635 | LD Size MachRegsAddr Reg -- size, src, dst
636 | ST Size Reg MachRegsAddr -- size, src, dst
640 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
641 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
643 -- Simple bit-twiddling.
645 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
646 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
647 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
648 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
649 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
650 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
651 | SLL Reg RI Reg -- src1, src2, dst
652 | SRL Reg RI Reg -- src1, src2, dst
653 | SRA Reg RI Reg -- src1, src2, dst
654 | SETHI Imm Reg -- src, dst
655 | NOP -- Really SETHI 0, %g0, but worth an alias
659 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
660 -- right up until we spit them out.
662 | FABS Size Reg Reg -- src dst
663 | FADD Size Reg Reg Reg -- src1, src2, dst
664 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
665 | FDIV Size Reg Reg Reg -- src1, src2, dst
666 | FMOV Size Reg Reg -- src, dst
667 | FMUL Size Reg Reg Reg -- src1, src2, dst
668 | FNEG Size Reg Reg -- src, dst
669 | FSQRT Size Reg Reg -- src, dst
670 | FSUB Size Reg Reg Reg -- src1, src2, dst
671 | FxTOy Size Size Reg Reg -- src, dst
675 | BI Cond Bool Imm -- cond, annul?, target
676 | BF Cond Bool Imm -- cond, annul?, target
678 | JMP MachRegsAddr -- target
679 | CALL Imm Int Bool -- target, args, terminal
686 riZero (RIImm (ImmInt 0)) = True
687 riZero (RIImm (ImmInteger 0)) = True
688 riZero (RIReg (FixedReg ILIT(0))) = True
691 #endif {- sparc_TARGET_ARCH -}