2 % (c) The AQUA Project, Glasgow University, 1993-1998
4 \section[MachMisc]{Description of various machine-specific things}
7 #include "nativeGen/NCG.h"
11 sizeOf, primRepToSize,
15 volatileSaves, volatileRestores,
17 targetMaxDouble, targetMaxInt, targetMinDouble, targetMinInt,
23 Instr(..), IF_ARCH_i386(Operand(..) COMMA,)
26 IF_ARCH_i386(i386_insert_ffrees COMMA,)
38 #include "HsVersions.h"
39 -- #include "config.h"
41 import AbsCSyn ( MagicId(..) )
42 import AbsCUtils ( magicIdPrimRep )
43 import CLabel ( CLabel, isAsmTemp )
44 import Literal ( mkMachInt, Literal(..) )
45 import MachRegs ( stgReg, callerSaves, RegLoc(..),
49 import PrimRep ( PrimRep(..) )
50 import SMRep ( SMRep(..) )
51 import Stix ( StixTree(..), StixReg(..), CodeSegment )
52 import Panic ( panic )
53 import Char ( isDigit )
54 import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) )
55 import Outputable ( text )
56 import IOExts ( trace )
60 underscorePrefix :: Bool -- leading underscore on assembler labels?
62 #ifdef LEADING_UNDERSCORE
63 underscorePrefix = True
65 underscorePrefix = False
68 ---------------------------
69 fmtAsmLbl :: String -> String -- for formatting labels
73 {- The alpha assembler likes temporary labels to look like $L123
74 instead of L123. (Don't toss the L, because then Lf28
83 % ----------------------------------------------------------------
85 We (allegedly) put the first six C-call arguments in registers;
86 where do we start putting the rest of them?
88 eXTRA_STK_ARGS_HERE :: Int
90 = IF_ARCH_alpha(0, IF_ARCH_i386(23{-6x4bytes-}, IF_ARCH_sparc(23,???)))
93 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
95 Size of a @PrimRep@, in bytes.
98 sizeOf :: PrimRep -> Integer{-in bytes-}
99 -- the result is an Integer only because it's more convenient
101 sizeOf pr = case (primRepToSize pr) of
102 IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},)
103 IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
104 IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
107 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
109 Now the volatile saves and restores. We add the basic guys to the
110 list of ``user'' registers provided. Note that there are more basic
111 registers on the restore list, because some are reloaded from
114 (@volatileRestores@ used only for wrapper-hungry PrimOps.)
117 volatileSaves, volatileRestores :: [MagicId] -> [StixTree]
119 save_cands = [BaseReg,Sp,Su,SpLim,Hp,HpLim]
120 restore_cands = save_cands
123 = map save ((filter callerSaves) (save_cands ++ vols))
125 save x = StAssign (magicIdPrimRep x) loc reg
127 reg = StReg (StixMagicId x)
128 loc = case stgReg x of
130 Always _ -> panic "volatileSaves"
132 volatileRestores vols
133 = map restore ((filter callerSaves) (restore_cands ++ vols))
135 restore x = StAssign (magicIdPrimRep x) reg loc
137 reg = StReg (StixMagicId x)
138 loc = case stgReg x of
140 Always _ -> panic "volatileRestores"
143 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
145 Obviously slightly weedy
146 (Note that the floating point values aren't terribly important.)
149 targetMinDouble = MachDouble (-1.7976931348623157e+308)
150 targetMaxDouble = MachDouble (1.7976931348623157e+308)
151 targetMinInt = mkMachInt (-2147483648)
152 targetMaxInt = mkMachInt 2147483647
155 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
157 This algorithm for determining the $\log_2$ of exact powers of 2 comes
158 from GCC. It requires bit manipulation primitives, and we use GHC
165 exactLog2 :: Integer -> Maybe Integer
167 = if (x <= 0 || x >= 2147483648) then
170 case (fromInteger x) of { I# x# ->
171 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
174 Just (toInteger (I# (pow2 x#)))
177 shiftr x y = shiftRL# x y
179 pow2 x# | x# ==# 1# = 0#
180 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftr` 1#))
183 % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 #if alpha_TARGET_ARCH
188 = ALWAYS -- For BI (same as BR)
189 | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
191 | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
192 | LE -- For CMP and BI
193 | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
195 | NEVER -- For BI (null instruction)
196 | ULE -- For CMP only
197 | ULT -- For CMP only
200 = ALWAYS -- What's really used? ToDo
214 #if sparc_TARGET_ARCH
215 = ALWAYS -- What's really used? ToDo
236 #if alpha_TARGET_ARCH
239 -- | W -- word (2 bytes): UNUSED
241 -- | L -- longword (4 bytes): UNUSED
242 | Q -- quadword (8 bytes)
243 -- | FF -- VAX F-style floating pt: UNUSED
244 -- | GF -- VAX G-style floating pt: UNUSED
245 -- | DF -- VAX D-style floating pt: UNUSED
246 -- | SF -- IEEE single-precision floating pt: UNUSED
247 | TF -- IEEE double-precision floating pt
251 -- | HB -- higher byte **UNUSED**
254 | F -- IEEE single-precision floating pt
255 | DF -- IEEE single-precision floating pt
256 | F80 -- Intel 80-bit internal FP format; only used for spilling
258 #if sparc_TARGET_ARCH
260 | BU -- byte (unsigned)
261 -- | HW -- halfword, 2 bytes (signed): UNUSED
262 -- | HWU -- halfword, 2 bytes (unsigned): UNUSED
264 -- | D -- doubleword, 8 bytes: UNUSED
265 | F -- IEEE single-precision floating pt
266 | DF -- IEEE single-precision floating pt
269 primRepToSize :: PrimRep -> Size
271 primRepToSize PtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
272 primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
273 primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
274 primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
275 primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
276 primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,)))
277 primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
278 primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
279 primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
280 primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( F ,)))
281 primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,)))
282 primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
283 primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
284 primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
285 primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
286 primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
287 primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
288 -- SUP: Wrong!!! Only for testing the rest of the NCG
289 primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B
290 primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B
293 %************************************************************************
295 \subsection{Machine's assembly language}
297 %************************************************************************
299 We have a few common ``instructions'' (nearly all the pseudo-ops) but
300 mostly all of @Instr@ is machine-specific.
304 = COMMENT FAST_STRING -- comment pseudo-op
305 | SEGMENT CodeSegment -- {data,text} segment pseudo-op
306 | LABEL CLabel -- global label pseudo-op
307 | ASCII Bool -- True <=> needs backslash conversion
308 String -- the literal string
311 | DELTA Int -- specify current stack offset for
312 -- benefit of subsequent passes
316 #if alpha_TARGET_ARCH
318 -- data Instr continues...
322 | LD Size Reg MachRegsAddr -- size, dst, src
323 | LDA Reg MachRegsAddr -- dst, src
324 | LDAH Reg MachRegsAddr -- dst, src
325 | LDGP Reg MachRegsAddr -- dst, src
326 | LDI Size Reg Imm -- size, dst, src
327 | ST Size Reg MachRegsAddr -- size, src, dst
332 | ABS Size RI Reg -- size, src, dst
333 | NEG Size Bool RI Reg -- size, overflow, src, dst
334 | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
335 | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
336 | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
337 | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
338 | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
339 | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
340 | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
342 -- Simple bit-twiddling.
362 | CMP Cond Reg RI Reg
369 | FADD Size Reg Reg Reg
370 | FDIV Size Reg Reg Reg
371 | FMUL Size Reg Reg Reg
372 | FSUB Size Reg Reg Reg
373 | CVTxy Size Size Reg Reg
374 | FCMP Size Cond Reg Reg Reg
382 | JMP Reg MachRegsAddr Int
384 | JSR Reg MachRegsAddr Int
386 -- Alpha-specific pseudo-ops.
395 #endif {- alpha_TARGET_ARCH -}
398 Intel, in their infinite wisdom, selected a stack model for floating
399 point registers on x86. That might have made sense back in 1979 --
400 nowadays we can see it for the nonsense it really is. A stack model
401 fits poorly with the existing nativeGen infrastructure, which assumes
402 flat integer and FP register sets. Prior to this commit, nativeGen
403 could not generate correct x86 FP code -- to do so would have meant
404 somehow working the register-stack paradigm into the register
405 allocator and spiller, which sounds very difficult.
407 We have decided to cheat, and go for a simple fix which requires no
408 infrastructure modifications, at the expense of generating ropey but
409 correct FP code. All notions of the x86 FP stack and its insns have
410 been removed. Instead, we pretend (to the instruction selector and
411 register allocator) that x86 has six floating point registers, %fake0
412 .. %fake5, which can be used in the usual flat manner. We further
413 claim that x86 has floating point instructions very similar to SPARC
414 and Alpha, that is, a simple 3-operand register-register arrangement.
415 Code generation and register allocation proceed on this basis.
417 When we come to print out the final assembly, our convenient fiction
418 is converted to dismal reality. Each fake instruction is
419 independently converted to a series of real x86 instructions.
420 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
421 arithmetic operations, the two operands are pushed onto the top of the
422 FP stack, the operation done, and the result copied back into the
423 relevant register. There are only six %fake registers because 2 are
424 needed for the translation, and x86 has 8 in total.
426 The translation is inefficient but is simple and it works. A cleverer
427 translation would handle a sequence of insns, simulating the FP stack
428 contents, would not impose a fixed mapping from %fake to %st regs, and
429 hopefully could avoid most of the redundant reg-reg moves of the
432 We might as well make use of whatever unique FP facilities Intel have
433 chosen to bless us with (let's not be churlish, after all).
434 Hence GLDZ and GLD1. Bwahahahahahahaha!
439 -- data Instr continues...
443 | MOV Size Operand Operand
444 | MOVZxL Size Operand Operand -- size is the size of operand 1
445 | MOVSxL Size Operand Operand -- size is the size of operand 1
447 -- Load effective address (also a very useful three-operand add instruction :-)
449 | LEA Size Operand Operand
453 | ADD Size Operand Operand
454 | SUB Size Operand Operand
456 -- Multiplication (signed and unsigned), Division (signed and unsigned),
457 -- result in %eax, %edx.
459 | IMUL Size Operand Operand
462 -- Simple bit-twiddling.
464 | AND Size Operand Operand
465 | OR Size Operand Operand
466 | XOR Size Operand Operand
468 | NEGI Size Operand -- NEG instruction (name clash with Cond)
469 | SHL Size Imm Operand -- Only immediate shifts allowed
470 | SAR Size Imm Operand -- Only immediate shifts allowed
471 | SHR Size Imm Operand -- Only immediate shifts allowed
472 | BT Size Imm Operand
477 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
478 -- as single instructions right up until we spit them out.
480 -- all the 3-operand fake fp insns are src1 src2 dst
481 -- and furthermore are constrained to be fp regs only.
482 -- IMPORTANT: keep is_G_insn up to date with any changes here
483 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
484 | GLD Size MachRegsAddr Reg -- src, dst(fpreg)
485 | GST Size Reg MachRegsAddr -- src(fpreg), dst
487 | GLDZ Reg -- dst(fpreg)
488 | GLD1 Reg -- dst(fpreg)
490 | GFTOD Reg Reg -- src(fpreg), dst(fpreg)
491 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
493 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
494 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
496 | GITOF Reg Reg -- src(intreg), dst(fpreg)
497 | GITOD Reg Reg -- src(intreg), dst(fpreg)
499 | GADD Size Reg Reg Reg -- src1, src2, dst
500 | GDIV Size Reg Reg Reg -- src1, src2, dst
501 | GSUB Size Reg Reg Reg -- src1, src2, dst
502 | GMUL Size Reg Reg Reg -- src1, src2, dst
504 | GCMP Size Reg Reg -- src1, src2
506 | GABS Size Reg Reg -- src, dst
507 | GNEG Size Reg Reg -- src, dst
508 | GSQRT Size Reg Reg -- src, dst
509 | GSIN Size Reg Reg -- src, dst
510 | GCOS Size Reg Reg -- src, dst
511 | GTAN Size Reg Reg -- src, dst
513 | GFREE -- do ffree on all x86 regs; an ugly hack
516 | TEST Size Operand Operand
517 | CMP Size Operand Operand
529 | JMP Operand -- target
530 | JXX Cond CLabel -- target
535 | CLTD -- sign extend %eax into %edx:%eax
538 = OpReg Reg -- register
539 | OpImm Imm -- immediate value
540 | OpAddr MachRegsAddr -- memory reference
543 i386_insert_ffrees :: [Instr] -> [Instr]
544 i386_insert_ffrees insns
545 | any is_G_instr insns
546 = concatMap ffree_before_nonlocal_transfers insns
550 ffree_before_nonlocal_transfers insn
552 CALL _ -> [GFREE, insn]
553 JMP (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
554 JMP _ -> [GFREE, insn]
558 -- if you ever add a new FP insn to the fake x86 FP insn set,
559 -- you must update this too
560 is_G_instr :: Instr -> Bool
563 GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
564 GLDZ _ -> True; GLD1 _ -> True;
565 GFTOD _ _ -> True; GFTOI _ _ -> True;
566 GDTOF _ _ -> True; GDTOI _ _ -> True;
567 GITOF _ _ -> True; GITOD _ _ -> True;
568 GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
569 GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
570 GCMP _ _ _ -> True; GABS _ _ _ -> True
571 GNEG _ _ _ -> True; GSQRT _ _ _ -> True
572 GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
573 GFREE -> panic "is_G_instr: GFREE (!)"
576 #endif {- i386_TARGET_ARCH -}
580 #if sparc_TARGET_ARCH
582 -- data Instr continues...
586 | LD Size MachRegsAddr Reg -- size, src, dst
587 | ST Size Reg MachRegsAddr -- size, src, dst
591 | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
592 | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
594 -- Simple bit-twiddling.
596 | AND Bool Reg RI Reg -- cc?, src1, src2, dst
597 | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
598 | OR Bool Reg RI Reg -- cc?, src1, src2, dst
599 | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
600 | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
601 | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
602 | SLL Reg RI Reg -- src1, src2, dst
603 | SRL Reg RI Reg -- src1, src2, dst
604 | SRA Reg RI Reg -- src1, src2, dst
605 | SETHI Imm Reg -- src, dst
606 | NOP -- Really SETHI 0, %g0, but worth an alias
610 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions
611 -- right up until we spit them out.
613 | FABS Size Reg Reg -- src dst
614 | FADD Size Reg Reg Reg -- src1, src2, dst
615 | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
616 | FDIV Size Reg Reg Reg -- src1, src2, dst
617 | FMOV Size Reg Reg -- src, dst
618 | FMUL Size Reg Reg Reg -- src1, src2, dst
619 | FNEG Size Reg Reg -- src, dst
620 | FSQRT Size Reg Reg -- src, dst
621 | FSUB Size Reg Reg Reg -- src1, src2, dst
622 | FxTOy Size Size Reg Reg -- src, dst
626 | BI Cond Bool Imm -- cond, annul?, target
627 | BF Cond Bool Imm -- cond, annul?, target
629 | JMP MachRegsAddr -- target
630 | CALL Imm Int Bool -- target, args, terminal
637 riZero (RIImm (ImmInt 0)) = True
638 riZero (RIImm (ImmInteger 0)) = True
639 riZero (RIReg (RealReg 0)) = True
642 #endif {- sparc_TARGET_ARCH -}