X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FcodeGen%2FCgStackery.lhs;h=4b1b414064d237a7a802a65df4243a8493d3db8a;hb=3a49601be4ed68d59ca9a81589e3cb627ae268d7;hp=caf38104dd59915a634a6265e096e585a2eb38a5;hpb=e7498a3ee1d0484d02a9e86633cc179c76ebf36e;p=ghc-hetmet.git diff --git a/ghc/compiler/codeGen/CgStackery.lhs b/ghc/compiler/codeGen/CgStackery.lhs index caf3810..4b1b414 100644 --- a/ghc/compiler/codeGen/CgStackery.lhs +++ b/ghc/compiler/codeGen/CgStackery.lhs @@ -1,5 +1,7 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgStackery.lhs,v 1.24 2003/11/17 14:42:47 simonmar Exp $ % \section[CgStackery]{Stack management functions} @@ -7,26 +9,32 @@ Stack-twiddling operations, which are pretty low-down and grimy. (This is the module that knows all about stack layouts, etc.) \begin{code} -#include "HsVersions.h" - module CgStackery ( - allocAStack, allocBStack, allocAStackTop, allocBStackTop, - allocUpdateFrame, - adjustRealSps, getFinalStackHW, - mkVirtStkOffsets, mkStkAmodes + allocPrimStack, allocStackTop, deAllocStackTop, + adjustStackHW, getFinalStackHW, + setStackFrame, getStackFrame, + mkVirtStkOffsets, mkStkAmodes, + freeStackSlots, dataStackSlots, + updateFrameSize, + constructSlowCall, slowArgs, ) where -IMP_Ubiq(){-uitous-} +#include "HsVersions.h" import CgMonad import AbsCSyn - -import AbsCUtils ( mkAbstractCs, mkAbsCStmts, getAmodeRep ) -import HeapOffs ( VirtualSpAOffset(..), VirtualSpBOffset(..) ) -import PrimRep ( getPrimRepSize, separateByPtrFollowness, - PrimRep(..) - ) -import Util ( mapAccumR, panic ) +import CLabel ( mkRtsApplyInfoLabel, mkRtsApplyEntryLabel ) + +import CgUsages ( getRealSp ) +import AbsCUtils ( mkAbstractCs, getAmodeRep ) +import PrimRep +import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros ) +import Constants +import Util ( sortLt ) +import FastString ( LitString ) +import Panic + +import TRACE ( trace ) \end{code} %************************************************************************ @@ -35,89 +43,121 @@ import Util ( mapAccumR, panic ) %* * %************************************************************************ -@mkVirtStkOffsets@ is given a list of arguments. The first argument -gets the {\em largest} virtual stack offset (remember, virtual offsets +'mkVirtStkOffsets' is given a list of arguments. The first argument +gets the /largest/ virtual stack offset (remember, virtual offsets increase towards the top of stack). \begin{code} -mkVirtStkOffsets :: VirtualSpAOffset -- Offset of the last allocated thing - -> VirtualSpBOffset -- ditto +mkVirtStkOffsets + :: VirtualSpOffset -- Offset of the last allocated thing -> (a -> PrimRep) -- to be able to grab kinds -> [a] -- things to make offsets for - -> (VirtualSpAOffset, -- OUTPUTS: Topmost allocated word - VirtualSpBOffset, -- ditto - [(a, VirtualSpAOffset)], -- boxed things with offsets - [(a, VirtualSpBOffset)]) -- unboxed things with offsets - -mkVirtStkOffsets init_SpA_offset init_SpB_offset kind_fun things - = let (boxeds, unboxeds) - = separateByPtrFollowness kind_fun things - (last_SpA_offset, boxd_w_offsets) - = mapAccumR computeOffset init_SpA_offset boxeds - (last_SpB_offset, ubxd_w_offsets) - = mapAccumR computeOffset init_SpB_offset unboxeds - in - (last_SpA_offset, last_SpB_offset, boxd_w_offsets, ubxd_w_offsets) - where - computeOffset offset thing - = (offset + (max 1 . getPrimRepSize . kind_fun) thing, (thing, offset+(1::Int))) - -- The "max 1" bit is ULTRA important - -- Why? mkVirtStkOffsets is the unique function that lays out function - -- arguments on the stack. The "max 1" ensures that every argument takes - -- at least one stack slot, even if it's of kind VoidKind that actually - -- takes no space at all. - -- This is important to make sure that argument satisfaction checks work - -- properly. Consider - -- f a b s# = (a,b) - -- where s# is a VoidKind. f's argument satisfaction check will check - -- that s# is on the B stack above SuB; but if s# takes zero space, the - -- check will be ARGS_B_CHK(0), which always succeeds. As a result, even - -- if a,b aren't available either, the PAP update won't trigger and - -- we are throughly hosed. (SLPJ 96/05) -\end{code} - -@mkStackAmodes@ is a higher-level version of @mkStackOffsets@. -It starts from the tail-call locations. -It returns a single list of addressing modes for the stack locations, -and therefore is in the monad. + -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + [(a, VirtualSpOffset)]) -- things with offsets -It also adjusts the high water mark if necessary. - -\begin{code} -mkStkAmodes :: VirtualSpAOffset -- Tail call positions - -> VirtualSpBOffset - -> [CAddrMode] -- things to make offsets for - -> FCode (VirtualSpAOffset, -- OUTPUTS: Topmost allocated word - VirtualSpBOffset, -- ditto - AbstractC) -- Assignments to appropriate stk slots - -mkStkAmodes tail_spa tail_spb things - info_down (MkCgState absC binds usage) - = (result, MkCgState absC binds new_usage) +mkVirtStkOffsets init_Sp_offset kind_fun things + = loop init_Sp_offset [] (reverse things) where - result = (last_SpA_offset, last_SpB_offset, mkAbstractCs abs_cs) - - (last_SpA_offset, last_SpB_offset, ptrs_w_offsets, non_ptrs_w_offsets) - = mkVirtStkOffsets tail_spa tail_spb getAmodeRep things - - abs_cs - = [ CAssign (CVal (SpARel realSpA offset) PtrRep) thing - | (thing, offset) <- ptrs_w_offsets - ] - ++ - [ CAssign (CVal (SpBRel realSpB offset) (getAmodeRep thing)) thing - | (thing, offset) <- non_ptrs_w_offsets + loop offset offs [] = (offset,offs) + loop offset offs (t:things) = + let + size = getPrimRepSize (kind_fun t) + thing_slot = offset + size + in + loop thing_slot ((t,thing_slot):offs) things + -- offset of thing is offset+size, because we're growing the stack + -- *downwards* as the offsets increase. + + +-- | 'mkStkAmodes' is a higher-level version of +-- 'mkVirtStkOffsets'. It starts from the tail-call locations. +-- It returns a single list of addressing modes for the stack +-- locations, and therefore is in the monad. It /doesn't/ adjust the +-- high water mark. + +mkStkAmodes + :: VirtualSpOffset -- Tail call positions + -> [CAddrMode] -- things to make offsets for + -> FCode (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + AbstractC) -- Assignments to appropriate stk slots + +mkStkAmodes tail_Sp things + = getRealSp `thenFC` \ realSp -> + let + (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp getAmodeRep things + + abs_cs = + [ CAssign (CVal (spRel realSp offset) (getAmodeRep thing)) thing + | (thing, offset) <- offsets ] + in + returnFC (last_Sp_offset, mkAbstractCs abs_cs) +\end{code} - ((vspA,fspA,realSpA,hwSpA), (vspB,fspB,realSpB,hwSpB), h_usage) = usage - - new_usage = ((vspA,fspA,realSpA,max last_SpA_offset hwSpA), - (vspB,fspB,realSpB,max last_SpB_offset hwSpB), - h_usage) - -- No need to fiddle with virtual SpA etc because this call is - -- only done just before the end of a block +%************************************************************************ +%* * +\subsection{Pushing the arguments for a slow call} +%* * +%************************************************************************ +For a slow call, we must take a bunch of arguments and intersperse +some stg_ap__ret_info return addresses. +\begin{code} +constructSlowCall :: [CAddrMode] -> (CAddrMode, [CAddrMode]) + -- don't forget the zero case +constructSlowCall [] = (CLbl stg_ap_0 CodePtrRep , []) +constructSlowCall amodes = + -- traceSlowCall amodes $ + (CLbl lbl CodePtrRep, these ++ slowArgs rest) + where (tag, these, rest) = matchSlowPattern amodes + lbl = mkRtsApplyEntryLabel tag + +stg_ap_0 = mkRtsApplyEntryLabel SLIT("0") + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [CAddrMode] -> [CAddrMode] +slowArgs [] = [] +slowArgs amodes = CLbl lbl RetRep : args ++ slowArgs rest + where (tag, args, rest) = matchSlowPattern amodes + lbl = mkRtsApplyInfoLabel tag + +matchSlowPattern :: [CAddrMode] -> (LitString, [CAddrMode], [CAddrMode]) +matchSlowPattern amodes = (tag, these, rest) + where reps = map getAmodeRep amodes + (tag, n) = findMatch (map primRepToArgRep reps) + (these, rest) = splitAt n amodes + +-- These cases were found to cover about 99% of all slow calls: +findMatch (RepP: RepP: RepP: RepP: RepP: RepP: RepP: _) = (SLIT("ppppppp"), 7) +findMatch (RepP: RepP: RepP: RepP: RepP: RepP: _) = (SLIT("pppppp"), 6) +findMatch (RepP: RepP: RepP: RepP: RepP: _) = (SLIT("ppppp"), 5) +findMatch (RepP: RepP: RepP: RepP: _) = (SLIT("pppp"), 4) +findMatch (RepP: RepP: RepP: _) = (SLIT("ppp"), 3) +findMatch (RepP: RepP: RepV: _) = (SLIT("ppv"), 3) +findMatch (RepP: RepP: _) = (SLIT("pp"), 2) +findMatch (RepP: RepV: _) = (SLIT("pv"), 2) +findMatch (RepP: _) = (SLIT("p"), 1) +findMatch (RepV: _) = (SLIT("v"), 1) +findMatch (RepN: _) = (SLIT("n"), 1) +findMatch (RepF: _) = (SLIT("f"), 1) +findMatch (RepD: _) = (SLIT("d"), 1) +findMatch (RepL: _) = (SLIT("l"), 1) +findMatch _ = panic "CgStackery.findMatch" + +#ifdef DEBUG +primRepChar p | isFollowableRep p = 'p' +primRepChar VoidRep = 'v' +primRepChar FloatRep = 'f' +primRepChar DoubleRep = 'd' +primRepChar p | getPrimRepSize p == 1 = 'n' +primRepChar p | is64BitRep p = 'l' + +traceSlowCall amodes and_then + = trace ("call: " ++ map primRepChar (map getAmodeRep amodes)) and_then +#endif \end{code} %************************************************************************ @@ -127,174 +167,163 @@ mkStkAmodes tail_spa tail_spb things %************************************************************************ Allocate a virtual offset for something. -\begin{code} -allocAStack :: FCode VirtualSpAOffset -allocAStack info_down (MkCgState absC binds - ((virt_a, free_a, real_a, hw_a), b_usage, h_usage)) - = (chosen_slot, MkCgState absC binds (new_a_usage, b_usage, h_usage)) - where - push_virt_a = virt_a + 1 - - (chosen_slot, new_a_usage) - = if null free_a then - -- No free slots, so push a new one - -- We need to adjust the high-water mark - (push_virt_a, (push_virt_a, [], real_a, hw_a `max` push_virt_a)) - else - -- Free slots available, so use one - (free_slot, (virt_a, new_free_a, real_a, hw_a)) - - (free_slot, _) = head ([f | f@(slot, st) <- free_a, not (isStubbed st)] ++ free_a) - -- Try to find an un-stubbed location; - -- if none, return the first in the free list - -- We'll only try this if free_a is known to be non-empty - - -- Free list with the free_slot deleted - new_free_a = [ f | f@(s,_) <- free_a, s /= free_slot ] - -allocBStack :: Int -> FCode VirtualSpBOffset -allocBStack size info_down (MkCgState absC binds - (a_usage, (virt_b, free_b, real_b, hw_b), h_usage)) - = (chosen_slot, MkCgState absC binds (a_usage, new_b_usage, h_usage)) - where - push_virt_b = virt_b + size - - (chosen_slot, new_b_usage) - = case find_block free_b of - Nothing -> (virt_b+1, (push_virt_b, free_b, real_b, - hw_b `max` push_virt_b)) - -- Adjust high water mark - - Just slot -> (slot, (virt_b, delete_block free_b slot, real_b, hw_b)) - - -- find_block looks for a contiguous chunk of free slots - find_block :: [VirtualSpBOffset] -> Maybe VirtualSpBOffset - find_block [] = Nothing - find_block (slot:slots) - | take size (slot:slots) == [slot..slot+size-1] - = Just slot - | otherwise - = find_block slots - - delete_block free_b slot = [s | s <- free_b, (s=slot+size)] +\begin{code} +allocPrimStack :: Int -> FCode VirtualSpOffset +allocPrimStack size = do + ((virt_sp, frame, free_stk, real_sp, hw_sp),h_usage) <- getUsage + let push_virt_sp = virt_sp + size + let (chosen_slot, new_stk_usage) = + case find_block free_stk of + Nothing -> (push_virt_sp, + (push_virt_sp, frame, free_stk, real_sp, + hw_sp `max` push_virt_sp)) + -- Adjust high water mark + Just slot -> (slot, + (virt_sp, frame, + delete_block free_stk slot, + real_sp, hw_sp)) + setUsage (new_stk_usage, h_usage) + return chosen_slot + + where + -- find_block looks for a contiguous chunk of free slots + find_block :: [(VirtualSpOffset,Slot)] -> Maybe VirtualSpOffset + find_block [] = Nothing + find_block ((off,free):slots) + | take size ((off,free):slots) == + zip [off..top_slot] (repeat Free) = Just top_slot + | otherwise = find_block slots + -- The stack grows downwards, with increasing virtual offsets. + -- Therefore, the address of a multi-word object is the *highest* + -- virtual offset it occupies (top_slot below). + where top_slot = off+size-1 + + delete_block free_stk slot = [ (s,f) | (s,f) <- free_stk, + (s<=slot-size) || (s>slot) ] -- Retain slots which are not in the range - -- slot..slot+size-1 - --- Allocate a chunk ON TOP OF the stack -allocAStackTop :: Int -> FCode VirtualSpAOffset -allocAStackTop size info_down (MkCgState absC binds - ((virt_a, free_a, real_a, hw_a), b_usage, h_usage)) - = (chosen_slot, MkCgState absC binds (new_a_usage, b_usage, h_usage)) - where - push_virt_a = virt_a + size - chosen_slot = virt_a + 1 - new_a_usage = (push_virt_a, free_a, real_a, hw_a `max` push_virt_a) - -- Adjust high water mark - --- Allocate a chunk ON TOP OF the stack -allocBStackTop :: Int -> FCode VirtualSpBOffset -allocBStackTop size info_down (MkCgState absC binds - (a_usage, (virt_b, free_b, real_b, hw_b), h_usage)) - = (chosen_slot, MkCgState absC binds (a_usage, new_b_usage, h_usage)) - where - push_virt_b = virt_b + size - chosen_slot = virt_b+1 - new_b_usage = (push_virt_b, free_b, real_b, hw_b `max` push_virt_b) - -- Adjust high water mark + -- slot-size+1..slot \end{code} -@allocUpdateFrame@ allocates enough space for an update frame -on the B stack, records the fact in the end-of-block info (in the ``args'' -fields), and passes on the old ``args'' fields to the enclosed code. +Allocate a chunk ON TOP OF the stack. -This is all a bit disgusting. +ToDo: should really register this memory as NonPointer stuff in the +free list. \begin{code} -allocUpdateFrame :: Int -- Size of frame - -> CAddrMode -- Return address which is to be the - -- top word of frame - -> ((VirtualSpAOffset, VirtualSpBOffset, VirtualSpBOffset) -> Code) - -- Scope of update - -> Code - -allocUpdateFrame size update_amode code - (MkCgInfoDown c_info statics (EndOfBlockInfo args_spa args_spb sequel)) - (MkCgState absc binds (a_usage, (vB,rr,qq,hwB),h_usage)) - = case sequel of - - InRetReg -> code (args_spa, args_spb, vB) - (MkCgInfoDown c_info statics new_eob_info) - (MkCgState absc binds new_usage) +allocStackTop :: Int -> FCode VirtualSpOffset +allocStackTop size = do + ((virt_sp, frame, free_stk, real_sp, hw_sp), h_usage) <- getUsage + let push_virt_sp = virt_sp + size + let new_stk_usage = (push_virt_sp, frame, free_stk, real_sp, + hw_sp `max` push_virt_sp) + setUsage (new_stk_usage, h_usage) + return push_virt_sp +\end{code} - other -> panic "allocUpdateFrame" +Pop some words from the current top of stack. This is used for +de-allocating the return address in a case alternative. - where - new_vB = vB + size - new_eob_info = EndOfBlockInfo args_spa new_vB (UpdateCode update_amode) - new_usage = (a_usage, (new_vB,rr,qq,hwB `max` new_vB), h_usage) +\begin{code} +deAllocStackTop :: Int -> FCode VirtualSpOffset +deAllocStackTop size = do + ((virt_sp, frame, free_stk, real_sp, hw_sp), h_usage) <- getUsage + let pop_virt_sp = virt_sp - size + let new_stk_usage = (pop_virt_sp, frame, free_stk, real_sp, hw_sp) + setUsage (new_stk_usage, h_usage) + return pop_virt_sp \end{code} +\begin{code} +adjustStackHW :: VirtualSpOffset -> Code +adjustStackHW offset = do + ((vSp,fTop,fSp,realSp,hwSp), h_usage) <- getUsage + setUsage ((vSp, fTop, fSp, realSp, max offset hwSp), h_usage) +\end{code} A knot-tying beast. \begin{code} -getFinalStackHW :: (VirtualSpAOffset -> VirtualSpBOffset -> Code) -> Code -getFinalStackHW fcode info_down (MkCgState absC binds usages) = state1 - where - state1 = fcode hwSpA hwSpB info_down (MkCgState absC binds usages) - (MkCgState _ _ ((_,_,_, hwSpA), (_,_,_, hwSpB), _)) = state1 +getFinalStackHW :: (VirtualSpOffset -> Code) -> Code +getFinalStackHW fcode = do + fixC (\hwSp -> do + fcode hwSp + ((_,_,_,_, hwSp),_) <- getUsage + return hwSp) + return () +\end{code} + +\begin{code} +setStackFrame :: VirtualSpOffset -> Code +setStackFrame offset = do + ((vSp,_,fSp,realSp,hwSp), h_usage) <- getUsage + setUsage ((vSp, offset, fSp, realSp, hwSp), h_usage) + +getStackFrame :: FCode VirtualSpOffset +getStackFrame = do + ((vSp,frame,fSp,realSp,hwSp), h_usage) <- getUsage + return frame \end{code} +\begin{code} +updateFrameSize | opt_SccProfilingOn = pROF_UF_SIZE + | opt_GranMacros = trace ("updateFrameSize = " ++ (show gRAN_UF_SIZE))gRAN_UF_SIZE + | otherwise = uF_SIZE +\end{code} %************************************************************************ %* * -\subsection[CgStackery-adjust]{Adjusting the stack pointers} +\subsection[CgStackery-free]{Free stack slots} %* * %************************************************************************ -@adjustRealSpX@ generates code to alter the actual stack pointer, and -adjusts the environment accordingly. We are careful to push the -conditional inside the abstract C code to avoid black holes. -ToDo: combine together? - -These functions {\em do not} deal with high-water-mark adjustment. -That's done by functions which allocate stack space. +Explicitly free some stack space. \begin{code} -adjustRealSpA :: VirtualSpAOffset -- New offset for Arg stack ptr - -> Code -adjustRealSpA newRealSpA info_down (MkCgState absC binds - ((vspA,fA,realSpA,hwspA), - b_usage, h_usage)) - = MkCgState (mkAbsCStmts absC move_instrA) binds new_usage - where - move_instrA = if (newRealSpA == realSpA) then AbsCNop - else (CAssign - (CReg SpA) - (CAddr (SpARel realSpA newRealSpA))) - new_usage = ((vspA, fA, newRealSpA, hwspA), - b_usage, h_usage) - -adjustRealSpB :: VirtualSpBOffset -- New offset for Basic/Control stack ptr - -> Code -adjustRealSpB newRealSpB info_down (MkCgState absC binds - (a_usage, - (vspB,fB,realSpB,hwspB), - h_usage)) - = MkCgState (mkAbsCStmts absC move_instrB) binds new_usage - where - move_instrB = if (newRealSpB == realSpB) then AbsCNop - else (CAssign {-PtrRep-} - (CReg SpB) - (CAddr (SpBRel realSpB newRealSpB))) - new_usage = (a_usage, - (vspB, fB, newRealSpB, hwspB), - h_usage) - -adjustRealSps :: VirtualSpAOffset -- New offset for Arg stack ptr - -> VirtualSpBOffset -- Ditto B stack - -> Code -adjustRealSps newRealSpA newRealSpB - = adjustRealSpA newRealSpA `thenC` adjustRealSpB newRealSpB +addFreeStackSlots :: [VirtualSpOffset] -> Slot -> Code +addFreeStackSlots extra_free slot = do + ((vsp, frame,free, real, hw),heap_usage) <- getUsage + let all_free = addFreeSlots free (zip (sortLt (<) extra_free) (repeat slot)) + let (new_vsp, new_free) = trim vsp all_free + let new_usage = ((new_vsp, frame, new_free, real, hw), heap_usage) + setUsage new_usage + +freeStackSlots :: [VirtualSpOffset] -> Code +freeStackSlots slots = addFreeStackSlots slots Free + +dataStackSlots :: [VirtualSpOffset] -> Code +dataStackSlots slots = addFreeStackSlots slots NonPointer + +addFreeSlots :: [(Int,Slot)] -> [(Int,Slot)] -> [(Int,Slot)] +addFreeSlots cs [] = cs +addFreeSlots [] ns = ns +addFreeSlots ((c,s):cs) ((n,s'):ns) + = if c < n then + (c,s) : addFreeSlots cs ((n,s'):ns) + else if c > n then + (n,s') : addFreeSlots ((c,s):cs) ns + else if s /= s' then -- c == n + (c,s') : addFreeSlots cs ns + else + panic ("addFreeSlots: equal slots: " ++ show (c:map fst cs) + ++ show (n:map fst ns)) + +trim :: Int{-offset-} -> [(Int,Slot)] -> (Int{-offset-}, [(Int,Slot)]) +trim current_sp free_slots + = try current_sp free_slots + where + try csp [] = (csp,[]) + + try csp (slot@(off,state):slots) = + if state == Free && null slots' then + if csp' < off then + (csp', []) + else if csp' == off then + (csp'-1, []) + else + (csp',[slot]) + else + (csp', slot:slots') + where + (csp',slots') = try csp slots \end{code}