- allocCallSlot areaMap (Block id stackInfo t)
- | elemBlockSet id procPoints =
- let young = youngest_live areaMap $ live_in t
- start = case returnOff stackInfo of Just b -> max b young
- Nothing -> young
- z = allocSlotFrom ig areaSize start areaMap (CallArea (Young id))
- in pprTrace "allocCallSlot for" (ppr id <+> ppr young <+> ppr (live_in t) <+> ppr z) z
- allocCallSlot areaMap _ = areaMap
- -- mid foreign calls need to have info tables placed on the stack
- allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap =
- let young = youngest_live areaMap $ removeLiveSlotDefs (live_in t) m
- area = CallArea (Young bid)
- areaSize' = addToFM areaSize area (widthInBytes (typeWidth gcWord))
- in allocSlotFrom ig areaSize' young areaMap area
- allocMidCall _ _ areaMap = areaMap
- alloc m t areaMap =
- foldSlotsDefd alloc' (foldSlotsUsed alloc' (allocMidCall m t areaMap) m) m
- where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
- alloc' areaMap _ = areaMap
- layoutAreas areaMap b@(Block _ _ t) = layout areaMap t
- where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t
- layout areaMap (ZLast _) = allocCallSlot areaMap b
- areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) (postorder_dfs g)
- in pprTrace "ProcPoints" (ppr procPoints) $
- pprTrace "Area SizeMap" (ppr areaSize) $
- pprTrace "Entry SP" (ppr entrySp) $
- pprTrace "Area Map" (ppr areaMap) $ areaMap
+
+ -- Update the successor's incoming SP.
+ setSuccSPs inSp bid areaMap =
+ case (Map.lookup area areaMap , mapLookup bid (toBlockMap g)) of
+ (Just _, _) -> areaMap -- succ already knows incoming SP
+ (Nothing, Just _) ->
+ if setMember bid procPoints then
+ let young = youngest_live areaMap $ env' bid
+ -- start = case returnOff stackInfo of Just b -> max b young
+ -- Nothing -> young
+ start = young -- maybe wrong, but I don't understand
+ -- why the preceding is necessary...
+ in allocSlotFrom ig areaSize start areaMap area
+ else Map.insert area inSp areaMap
+ (_, Nothing) -> panic "Block not found in cfg"
+ where area = CallArea (Young bid)
+
+ layoutAreas areaMap block = foldBlockNodesF3 (flip const, allocMid, allocLast (entryLabel block)) block areaMap
+ allocMid m areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap m) m
+ allocLast bid l areaMap =
+ foldr (setSuccSPs inSp) areaMap' (successors l)
+ where inSp = slot + spOffset -- [Procpoint Sp offset]
+ -- If it's not in the map, we should use our previous
+ -- calculation unchanged.
+ spOffset = mapLookup bid spEntryMap `orElse` 0
+ slot = expectJust "slot in" $ Map.lookup (CallArea (Young bid)) areaMap
+ areaMap' = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap l) l
+ alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
+ alloc' areaMap _ = areaMap
+
+ initMap = Map.insert (CallArea (Young (g_entry g))) 0
+ . Map.insert (CallArea Old) 0
+ $ Map.empty
+
+ areaMap = foldl layoutAreas initMap (postorderDfs g)
+ in -- pprTrace "ProcPoints" (ppr procPoints) $
+ -- pprTrace "Area SizeMap" (ppr areaSize) $
+ -- pprTrace "Entry offset" (ppr entry_off) $
+ -- pprTrace "Area Map" (ppr areaMap) $
+ areaMap
+
+{- Note [Procpoint Sp offset]
+
+The calculation of inSp is a little tricky. (Un)fortunately, if you get
+it wrong, you will get inefficient but correct code. You know you've
+got it wrong if the generated stack pointer bounces up and down for no
+good reason.
+
+Why can't we just set inSp to the location of the slot? (This is what
+the code used to do.) The trouble is when we actually hit the proc
+point the start of the slot will not be the same as the actual Sp due
+to argument passing:
+
+ a:
+ I32[(young<b> + 4)] = cde;
+ // Stack pointer is moved to young end (bottom) of young<b> for call
+ // +-------+
+ // | arg 1 |
+ // +-------+ <- Sp
+ call (I32[foobar::I32])(...) returns to Just b (4) (4) with update frame 4;
+ b:
+ // After call, stack pointer is above the old end (top) of
+ // young<b> (the difference is spOffset)
+ // +-------+ <- Sp
+ // | arg 1 |
+ // +-------+
+
+If we blithely set the Sp to be the same as the slot (the young end of
+young<b>), an adjustment will be necessary when we go to the next block.
+This is wasteful. So, instead, for the next block after a procpoint,
+the actual Sp should be set to the same as the true Sp when we just
+entered the procpoint. Then manifestSP will automatically do the right
+thing.
+
+Questions you may ask:
+
+1. Why don't we need to change the mapping for the procpoint itself?
+ Because manifestSP does its own calculation of the true stack value,
+ manifestSP will notice the discrepancy between the actual stack
+ pointer and the slot start, and adjust all of its memory accesses
+ accordingly. So the only problem is when we adjust the Sp in
+ preparation for the successor block; that's why this code is here and
+ not in setSuccSPs.
+
+2. Why don't we make the procpoint call area and the true offset match
+ up? If we did that, we would never use memory above the true value
+ of the stack pointer, thus wasting all of the stack we used to store
+ arguments. You might think that some clever changes to the slot
+ offsets, using negative offsets, might fix it, but this does not make
+ semantic sense.
+
+3. If manifestSP is already calculating the true stack value, why we can't
+ do this trick inside manifestSP itself? The reason is that if two
+ branches join with inconsistent SPs, one of them has to be fixed: we
+ can't know what the fix should be without already knowing what the
+ chosen location of SP is on the next successor. (This is
+ the "succ already knows incoming SP" case), This calculation cannot
+ be easily done in manifestSP, since it processes the nodes
+ /backwards/. So we need to have figured this out before we hit
+ manifestSP.
+-}