2 -- | Evaluation of 32 bit values.
3 module SPARC.CodeGen.Gen32 (
10 import SPARC.CodeGen.CondCode
11 import SPARC.CodeGen.Amode
12 import SPARC.CodeGen.Gen64
13 import SPARC.CodeGen.Base
31 -- | The dual to getAnyReg: compute an expression into a register, but
32 -- we don't mind which one it is.
33 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
38 tmp <- getNewRegNat rep
39 return (tmp, code tmp)
45 -- | Make code to evaluate a 32 bit expression.
47 getRegister :: CmmExpr -> NatM Register
49 getRegister (CmmReg reg)
50 = return (Fixed (cmmTypeSize (cmmRegType reg))
51 (getRegisterReg reg) nilOL)
53 getRegister tree@(CmmRegOff _ _)
54 = getRegister (mangleIndexTree tree)
56 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
57 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
58 ChildCode64 code rlo <- iselExpr64 x
59 return $ Fixed II32 (getHiVRegFromLo rlo) code
61 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
62 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
63 ChildCode64 code rlo <- iselExpr64 x
64 return $ Fixed II32 (getHiVRegFromLo rlo) code
66 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
67 ChildCode64 code rlo <- iselExpr64 x
68 return $ Fixed II32 rlo code
70 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
71 ChildCode64 code rlo <- iselExpr64 x
72 return $ Fixed II32 rlo code
75 -- Load a literal float into a float register.
76 -- The actual literal is stored in a new data area, and we load it
78 getRegister (CmmLit (CmmFloat f W32)) = do
80 -- a label for the new data area
82 tmp <- getNewRegNat II32
88 CmmStaticLit (CmmFloat f W32)],
91 SETHI (HI (ImmCLbl lbl)) tmp,
92 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
94 return (Any FF32 code)
96 getRegister (CmmLit (CmmFloat d W64)) = do
98 tmp <- getNewRegNat II32
102 CmmStaticLit (CmmFloat d W64)],
103 SETHI (HI (ImmCLbl lbl)) tmp,
104 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
105 return (Any FF64 code)
109 getRegister (CmmMachOp mop [x])
111 -- Floating point negation -------------------------
112 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
113 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
116 -- Integer negation --------------------------------
117 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
118 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
121 -- Float word size conversion ----------------------
122 MO_FF_Conv W64 W32 -> coerceDbl2Flt x
123 MO_FF_Conv W32 W64 -> coerceFlt2Dbl x
126 -- Float <-> Signed Int conversion -----------------
127 MO_FS_Conv from to -> coerceFP2Int from to x
128 MO_SF_Conv from to -> coerceInt2FP from to x
131 -- Unsigned integer word size conversions ----------
133 -- If it's the same size, then nothing needs to be done.
135 | from == to -> conversionNop (intSize to) x
137 -- To narrow an unsigned word, mask out the high bits to simulate what would
138 -- happen if we copied the value into a smaller register.
139 MO_UU_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
140 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
142 -- for narrowing 32 bit to 16 bit, don't use a literal mask value like the W16->W8
143 -- case because the only way we can load it is via SETHI, which needs 2 ops.
144 -- Do some shifts to chop out the high bits instead.
146 -> do tmpReg <- getNewRegNat II32
147 (xReg, xCode) <- getSomeReg x
151 [ SLL xReg (RIImm $ ImmInt 16) tmpReg
152 , SRL tmpReg (RIImm $ ImmInt 16) dst]
154 return $ Any II32 code
156 -- trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))
158 -- To widen an unsigned word we don't have to do anything.
159 -- Just leave it in the same register and mark the result as the new size.
160 MO_UU_Conv W8 W16 -> conversionNop (intSize W16) x
161 MO_UU_Conv W8 W32 -> conversionNop (intSize W32) x
162 MO_UU_Conv W16 W32 -> conversionNop (intSize W32) x
165 -- Signed integer word size conversions ------------
167 -- Mask out high bits when narrowing them
168 MO_SS_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
169 MO_SS_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
170 MO_SS_Conv W32 W16 -> trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))
172 -- Sign extend signed words when widening them.
173 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
174 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
175 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
177 _ -> panic ("Unknown unary mach op: " ++ show mop)
180 -- Binary machine ops
181 getRegister (CmmMachOp mop [x, y])
183 MO_Eq _ -> condIntReg EQQ x y
184 MO_Ne _ -> condIntReg NE x y
186 MO_S_Gt _ -> condIntReg GTT x y
187 MO_S_Ge _ -> condIntReg GE x y
188 MO_S_Lt _ -> condIntReg LTT x y
189 MO_S_Le _ -> condIntReg LE x y
191 MO_U_Gt W32 -> condIntReg GU x y
192 MO_U_Ge W32 -> condIntReg GEU x y
193 MO_U_Lt W32 -> condIntReg LU x y
194 MO_U_Le W32 -> condIntReg LEU x y
196 MO_U_Gt W16 -> condIntReg GU x y
197 MO_U_Ge W16 -> condIntReg GEU x y
198 MO_U_Lt W16 -> condIntReg LU x y
199 MO_U_Le W16 -> condIntReg LEU x y
201 MO_Add W32 -> trivialCode W32 (ADD False False) x y
202 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
204 MO_S_MulMayOflo rep -> imulMayOflo rep x y
206 MO_S_Quot W32 -> idiv True False x y
207 MO_U_Quot W32 -> idiv False False x y
209 MO_S_Rem W32 -> irem True x y
210 MO_U_Rem W32 -> irem False x y
212 MO_F_Eq _ -> condFltReg EQQ x y
213 MO_F_Ne _ -> condFltReg NE x y
215 MO_F_Gt _ -> condFltReg GTT x y
216 MO_F_Ge _ -> condFltReg GE x y
217 MO_F_Lt _ -> condFltReg LTT x y
218 MO_F_Le _ -> condFltReg LE x y
220 MO_F_Add w -> trivialFCode w FADD x y
221 MO_F_Sub w -> trivialFCode w FSUB x y
222 MO_F_Mul w -> trivialFCode w FMUL x y
223 MO_F_Quot w -> trivialFCode w FDIV x y
225 MO_And rep -> trivialCode rep (AND False) x y
226 MO_Or rep -> trivialCode rep (OR False) x y
227 MO_Xor rep -> trivialCode rep (XOR False) x y
229 MO_Mul rep -> trivialCode rep (SMUL False) x y
231 MO_Shl rep -> trivialCode rep SLL x y
232 MO_U_Shr rep -> trivialCode rep SRL x y
233 MO_S_Shr rep -> trivialCode rep SRA x y
235 _ -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
239 getRegister (CmmLoad mem pk) = do
240 Amode src code <- getAmode mem
242 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
243 return (Any (cmmTypeSize pk) code__2)
245 getRegister (CmmLit (CmmInt i _))
248 src = ImmInt (fromInteger i)
249 code dst = unitOL (OR False g0 (RIImm src) dst)
251 return (Any II32 code)
253 getRegister (CmmLit lit)
254 = let imm = litToImm lit
257 OR False dst (RIImm (LO imm)) dst]
258 in return (Any II32 code)
262 = panic "SPARC.CodeGen.Gen32.getRegister: no match"
265 -- | sign extend and widen
267 :: Width -- ^ width of source expression
268 -> Width -- ^ width of result
269 -> CmmExpr -- ^ source expression
272 integerExtend from to expr
273 = do -- load the expr into some register
274 (reg, e_code) <- getSomeReg expr
275 tmp <- getNewRegNat II32
281 _ -> panic "SPARC.CodeGen.Gen32: no match"
285 -- local shift word left to load the sign bit
286 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
288 -- arithmetic shift right to sign extend
289 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
291 return (Any (intSize to) code)
294 -- | For nop word format conversions we set the resulting value to have the
295 -- required size, but don't need to generate any actual code.
298 :: Size -> CmmExpr -> NatM Register
300 conversionNop new_rep expr
301 = do e_code <- getRegister expr
302 return (setSizeOfRegister e_code new_rep)
306 -- | Generate an integer division instruction.
307 idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
309 -- For unsigned division with a 32 bit numerator,
310 -- we can just clear the Y register.
313 (a_reg, a_code) <- getSomeReg x
314 (b_reg, b_code) <- getSomeReg y
321 , UDIV cc a_reg (RIReg b_reg) dst]
323 return (Any II32 code)
326 -- For _signed_ division with a 32 bit numerator,
327 -- we have to sign extend the numerator into the Y register.
330 (a_reg, a_code) <- getSomeReg x
331 (b_reg, b_code) <- getSomeReg y
333 tmp <- getNewRegNat II32
339 [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
340 , SRA tmp (RIImm (ImmInt 16)) tmp
343 , SDIV cc a_reg (RIReg b_reg) dst]
345 return (Any II32 code)
348 -- | Do an integer remainder.
350 -- NOTE: The SPARC v8 architecture manual says that integer division
351 -- instructions _may_ generate a remainder, depending on the implementation.
352 -- If so it is _recommended_ that the remainder is placed in the Y register.
354 -- The UltraSparc 2007 manual says Y is _undefined_ after division.
356 -- The SPARC T2 doesn't store the remainder, not sure about the others.
357 -- It's probably best not to worry about it, and just generate our own
360 irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
362 -- For unsigned operands:
363 -- Division is between a 64 bit numerator and a 32 bit denominator,
364 -- so we still have to clear the Y register.
367 (a_reg, a_code) <- getSomeReg x
368 (b_reg, b_code) <- getSomeReg y
370 tmp_reg <- getNewRegNat II32
377 , UDIV False a_reg (RIReg b_reg) tmp_reg
378 , UMUL False tmp_reg (RIReg b_reg) tmp_reg
379 , SUB False False a_reg (RIReg tmp_reg) dst]
381 return (Any II32 code)
385 -- For signed operands:
386 -- Make sure to sign extend into the Y register, or the remainder
387 -- will have the wrong sign when the numerator is negative.
389 -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
390 -- not the full 32. Not sure why this is, something to do with overflow?
391 -- If anyone cares enough about the speed of signed remainder they
392 -- can work it out themselves (then tell me). -- BL 2009/01/20
395 (a_reg, a_code) <- getSomeReg x
396 (b_reg, b_code) <- getSomeReg y
398 tmp1_reg <- getNewRegNat II32
399 tmp2_reg <- getNewRegNat II32
405 [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
406 , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
409 , SDIV False a_reg (RIReg b_reg) tmp2_reg
410 , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
411 , SUB False False a_reg (RIReg tmp2_reg) dst]
413 return (Any II32 code)
416 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
419 (a_reg, a_code) <- getSomeReg a
420 (b_reg, b_code) <- getSomeReg b
421 res_lo <- getNewRegNat II32
422 res_hi <- getNewRegNat II32
424 let shift_amt = case rep of
427 _ -> panic "shift_amt"
429 let code dst = a_code `appOL` b_code `appOL`
431 SMUL False a_reg (RIReg b_reg) res_lo,
433 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
434 SUB False False res_lo (RIReg res_hi) dst
436 return (Any II32 code)
439 -- -----------------------------------------------------------------------------
440 -- 'trivial*Code': deal with trivial instructions
442 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
443 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
444 -- Only look for constants on the right hand side, because that's
445 -- where the generic optimizer will have put them.
447 -- Similarly, for unary instructions, we don't have to worry about
448 -- matching an StInt as the argument, because genericOpt will already
449 -- have handled the constant-folding.
453 -> (Reg -> RI -> Reg -> Instr)
458 trivialCode _ instr x (CmmLit (CmmInt y _))
461 (src1, code) <- getSomeReg x
463 src2 = ImmInt (fromInteger y)
464 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
465 return (Any II32 code__2)
468 trivialCode _ instr x y = do
469 (src1, code1) <- getSomeReg x
470 (src2, code2) <- getSomeReg y
472 code__2 dst = code1 `appOL` code2 `snocOL`
473 instr src1 (RIReg src2) dst
474 return (Any II32 code__2)
479 -> (Size -> Reg -> Reg -> Reg -> Instr)
484 trivialFCode pk instr x y = do
485 (src1, code1) <- getSomeReg x
486 (src2, code2) <- getSomeReg y
487 tmp <- getNewRegNat FF64
489 promote x = FxTOy FF32 FF64 x tmp
495 if pk1 `cmmEqType` pk2 then
496 code1 `appOL` code2 `snocOL`
497 instr (floatSize pk) src1 src2 dst
498 else if typeWidth pk1 == W32 then
499 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
500 instr FF64 tmp src2 dst
502 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
503 instr FF64 src1 tmp dst
504 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
511 -> (RI -> Reg -> Instr)
515 trivialUCode size instr x = do
516 (src, code) <- getSomeReg x
518 code__2 dst = code `snocOL` instr (RIReg src) dst
519 return (Any size code__2)
524 -> (Reg -> Reg -> Instr)
528 trivialUFCode pk instr x = do
529 (src, code) <- getSomeReg x
531 code__2 dst = code `snocOL` instr src dst
532 return (Any pk code__2)
537 -- Coercions -------------------------------------------------------------------
539 -- | Coerce a integer value to floating point
540 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
541 coerceInt2FP width1 width2 x = do
542 (src, code) <- getSomeReg x
544 code__2 dst = code `appOL` toOL [
545 ST (intSize width1) src (spRel (-2)),
546 LD (intSize width1) (spRel (-2)) dst,
547 FxTOy (intSize width1) (floatSize width2) dst dst]
548 return (Any (floatSize $ width2) code__2)
552 -- | Coerce a floating point value to integer
554 -- NOTE: On sparc v9 there are no instructions to move a value from an
555 -- FP register directly to an int register, so we have to use a load/store.
557 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
558 coerceFP2Int width1 width2 x
559 = do let fsize1 = floatSize width1
560 fsize2 = floatSize width2
562 isize2 = intSize width2
564 (fsrc, code) <- getSomeReg x
565 fdst <- getNewRegNat fsize2
570 -- convert float to int format, leaving it in a float reg.
571 [ FxTOy fsize1 isize2 fsrc fdst
573 -- store the int into mem, then load it back to move
574 -- it into an actual int reg.
575 , ST fsize2 fdst (spRel (-2))
576 , LD isize2 (spRel (-2)) dst]
578 return (Any isize2 code2)
581 -- | Coerce a double precision floating point value to single precision.
582 coerceDbl2Flt :: CmmExpr -> NatM Register
584 (src, code) <- getSomeReg x
585 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
588 -- | Coerce a single precision floating point value to double precision
589 coerceFlt2Dbl :: CmmExpr -> NatM Register
591 (src, code) <- getSomeReg x
592 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
597 -- Condition Codes -------------------------------------------------------------
599 -- Evaluate a comparision, and get the result into a register.
601 -- Do not fill the delay slots here. you will confuse the register allocator.
603 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
604 condIntReg EQQ x (CmmLit (CmmInt 0 _)) = do
605 (src, code) <- getSomeReg x
607 code__2 dst = code `appOL` toOL [
608 SUB False True g0 (RIReg src) g0,
609 SUB True False g0 (RIImm (ImmInt (-1))) dst]
610 return (Any II32 code__2)
612 condIntReg EQQ x y = do
613 (src1, code1) <- getSomeReg x
614 (src2, code2) <- getSomeReg y
616 code__2 dst = code1 `appOL` code2 `appOL` toOL [
617 XOR False src1 (RIReg src2) dst,
618 SUB False True g0 (RIReg dst) g0,
619 SUB True False g0 (RIImm (ImmInt (-1))) dst]
620 return (Any II32 code__2)
622 condIntReg NE x (CmmLit (CmmInt 0 _)) = do
623 (src, code) <- getSomeReg x
625 code__2 dst = code `appOL` toOL [
626 SUB False True g0 (RIReg src) g0,
627 ADD True False g0 (RIImm (ImmInt 0)) dst]
628 return (Any II32 code__2)
630 condIntReg NE x y = do
631 (src1, code1) <- getSomeReg x
632 (src2, code2) <- getSomeReg y
634 code__2 dst = code1 `appOL` code2 `appOL` toOL [
635 XOR False src1 (RIReg src2) dst,
636 SUB False True g0 (RIReg dst) g0,
637 ADD True False g0 (RIImm (ImmInt 0)) dst]
638 return (Any II32 code__2)
640 condIntReg cond x y = do
641 bid1@(BlockId _) <- getBlockIdNat
642 bid2@(BlockId _) <- getBlockIdNat
643 CondCode _ cond cond_code <- condIntCode cond x y
645 code__2 dst = cond_code `appOL` toOL [
646 BI cond False bid1, NOP,
647 OR False g0 (RIImm (ImmInt 0)) dst,
648 BI ALWAYS False bid2, NOP,
650 OR False g0 (RIImm (ImmInt 1)) dst,
652 return (Any II32 code__2)
655 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
656 condFltReg cond x y = do
657 bid1@(BlockId _) <- getBlockIdNat
658 bid2@(BlockId _) <- getBlockIdNat
660 CondCode _ cond cond_code <- condFltCode cond x y
662 code__2 dst = cond_code `appOL` toOL [
664 BF cond False bid1, NOP,
665 OR False g0 (RIImm (ImmInt 0)) dst,
666 BI ALWAYS False bid2, NOP,
668 OR False g0 (RIImm (ImmInt 1)) dst,
670 return (Any II32 code__2)