[PATCH] Replace late reloads Hoopl pass with generalized late assignment pass.
Edward Z. Yang
ezyang at MIT.EDU
Wed Apr 6 18:06:56 CEST 2011
This is a work in progress, see comments for notable infelicities.
But since it's good to get continuous feedback, I've sent a copy
of the patch via mail.
Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
---
compiler/cmm/CmmCPS.hs | 5 +-
compiler/cmm/CmmExpr.hs | 2 +
compiler/cmm/CmmSpillReload.hs | 256 +++++++++++++++++++++++++---------------
compiler/utils/UniqFM.lhs | 6 +-
4 files changed, 171 insertions(+), 98 deletions(-)
diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs
index b9f6db3..d386d9f 100644
--- a/compiler/cmm/CmmCPS.hs
+++ b/compiler/cmm/CmmCPS.hs
@@ -95,9 +95,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
(dualLivenessWithInsertion procPoints) g
-- Insert spills at defns; reloads at return points
g <-
- -- pprTrace "pre insertLateReloads" (ppr g) $
- runOptimization $ insertLateReloads g -- Duplicate reloads just before uses
- dump Opt_D_dump_cmmz "Post late reloads" g
+ runOptimization $ insertLateAssignments g
+ dump Opt_D_dump_cmmz "Post late assignments" g
g <-
-- pprTrace "post insertLateReloads" (ppr g) $
dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination"
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 3ae2996..c1be6ba 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -124,6 +124,8 @@ cmmExprType (CmmReg reg) = cmmRegType reg
cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
cmmExprType (CmmRegOff reg _) = cmmRegType reg
cmmExprType (CmmStackSlot _ _) = bWord -- an address
+-- Careful though: what is stored at the stack slot may be bigger than
+-- an address
cmmLitType :: CmmLit -> CmmType
cmmLitType (CmmInt _ width) = cmmBits width
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index 4e2dd38..3cceee1 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -14,9 +14,7 @@ module CmmSpillReload
--, insertSpillsAndReloads --- XXX todo check live-in at entry against formals
, dualLivenessWithInsertion
- , availRegsLattice
- , cmmAvailableReloads
- , insertLateReloads
+ , insertLateAssignments
, removeDeadAssignmentsAndReloads
)
where
@@ -31,6 +29,7 @@ import Control.Monad
import Outputable hiding (empty)
import qualified Outputable as PP
import UniqSet
+import UniqFM
import Compiler.Hoopl
import Data.Maybe
@@ -188,91 +187,6 @@ spill, reload :: LocalReg -> CmmNode O O
spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
-----------------------------------------------------------------
---- sinking reloads
-
--- The idea is to compute at each point the set of registers such that
--- on every path to the point, the register is defined by a Reload
--- instruction. Then, if a use appears at such a point, we can safely
--- insert a Reload right before the use. Finally, we can eliminate
--- the early reloads along with other dead assignments.
-
-data AvailRegs = UniverseMinus RegSet
- | AvailRegs RegSet
-
-
-availRegsLattice :: DataflowLattice AvailRegs
-availRegsLattice = DataflowLattice "register gotten from reloads" empty add
- where empty = UniverseMinus emptyRegSet
- -- | compute in the Tx monad to track whether anything has changed
- add _ (OldFact old) (NewFact new) =
- if join `smallerAvail` old then (SomeChange, join) else (NoChange, old)
- where join = interAvail new old
-
-
-interAvail :: AvailRegs -> AvailRegs -> AvailRegs
-interAvail (UniverseMinus s) (UniverseMinus s') = UniverseMinus (s `plusRegSet` s')
-interAvail (AvailRegs s) (AvailRegs s') = AvailRegs (s `timesRegSet` s')
-interAvail (AvailRegs s) (UniverseMinus s') = AvailRegs (s `minusRegSet` s')
-interAvail (UniverseMinus s) (AvailRegs s') = AvailRegs (s' `minusRegSet` s )
-
-smallerAvail :: AvailRegs -> AvailRegs -> Bool
-smallerAvail (AvailRegs _) (UniverseMinus _) = True
-smallerAvail (UniverseMinus _) (AvailRegs _) = False
-smallerAvail (AvailRegs s) (AvailRegs s') = sizeUniqSet s < sizeUniqSet s'
-smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > sizeUniqSet s'
-
-extendAvail :: AvailRegs -> LocalReg -> AvailRegs
-extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r)
-extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r)
-
-delFromAvail :: AvailRegs -> LocalReg -> AvailRegs
-delFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r)
-delFromAvail (AvailRegs s) r = AvailRegs (deleteFromRegSet s r)
-
-elemAvail :: AvailRegs -> LocalReg -> Bool
-elemAvail (UniverseMinus s) r = not $ elemRegSet r s
-elemAvail (AvailRegs s) r = elemRegSet r s
-
-cmmAvailableReloads :: CmmGraph -> FuelUniqSM (BlockEnv AvailRegs)
-cmmAvailableReloads g =
- liftM snd $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $
- analFwd availRegsLattice availReloadsTransfer
-
-availReloadsTransfer :: FwdTransfer CmmNode AvailRegs
-availReloadsTransfer = mkFTransfer3 (flip const) middleAvail ((mkFactBase availRegsLattice .) . lastAvail)
-
-middleAvail :: CmmNode O O -> AvailRegs -> AvailRegs
-middleAvail (CmmAssign (CmmLocal r) (CmmLoad l _)) avail
- | l `isStackSlotOf` r = extendAvail avail r
-middleAvail (CmmAssign lhs _) avail = foldRegsDefd delFromAvail avail lhs
-middleAvail (CmmStore l (CmmReg (CmmLocal r))) avail
- | l `isStackSlotOf` r = avail
-middleAvail (CmmStore (CmmStackSlot (RegSlot r) _) _) avail = delFromAvail avail r
-middleAvail (CmmStore {}) avail = avail
-middleAvail (CmmUnsafeForeignCall {}) _ = AvailRegs emptyRegSet
-middleAvail (CmmComment {}) avail = avail
-
-lastAvail :: CmmNode O C -> AvailRegs -> [(Label, AvailRegs)]
-lastAvail (CmmCall _ (Just k) _ _ _) _ = [(k, AvailRegs emptyRegSet)]
-lastAvail (CmmForeignCall {succ=k}) _ = [(k, AvailRegs emptyRegSet)]
-lastAvail l avail = map (\id -> (id, avail)) $ successors l
-
-insertLateReloads :: CmmGraph -> FuelUniqSM CmmGraph
-insertLateReloads g =
- liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $
- analRewFwd availRegsLattice availReloadsTransfer rewrites
- where rewrites = mkFRewrite3 first middle last
- first _ _ = return Nothing
- middle m avail = return $ maybe_reload_before avail m (mkMiddle m)
- last l avail = return $ maybe_reload_before avail l (mkLast l)
- maybe_reload_before avail node tail =
- let used = filterRegsUsed (elemAvail avail) node
- in if isEmptyUniqSet used then Nothing
- else Just $ reloadTail used tail
- reloadTail regset t = foldl rel t $ uniqSetToList regset
- where rel t r = mkMiddle (reload r) <*> t
-
removeDeadAssignmentsAndReloads :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
removeDeadAssignmentsAndReloads procPoints g =
liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
@@ -287,6 +201,166 @@ removeDeadAssignmentsAndReloads procPoints g =
nothing _ _ = return Nothing
+----------------------------------------------------------------
+--- sinking arbitrary loads
+
+-- The idea is to compute at each point an expression that could be used
+-- to calculate the contents of a register. Then, if a use appears at
+-- such a point, we can safely insert an assignment for that register
+-- right before the use. Finally, we can eliminate the earlier
+-- assignment along with other dead assignments.
+--
+-- XXX This code currently performs what is essentially unbounded
+-- inlining, which is not necessarily a good idea. We should associate
+-- a cost with all expressions, and only inline if the cost is not too
+-- high.
+--
+-- XXX Interacts poorly with the CmmOpt inliner, which only inlines
+-- if there's a single use of the register. Here, the register is
+-- assigned multiple times, so it won't actually get inlined...
+
+type AssignmentMap = UniqFM (LocalReg, WithTop CmmExpr)
+
+-- ToDo: Move this into somewhere more general (UniqFM? That will
+-- introduce a Hoopl dependency on that file.)
+joinUFM :: JoinFun v -> JoinFun (UniqFM v)
+joinUFM eltJoin l (OldFact old) (NewFact new) = foldUFM_Directly add (NoChange, old) new
+ where add k new_v (ch, joinmap) =
+ case lookupUFM_Directly joinmap k of
+ Nothing -> (SomeChange, addToUFM_Directly joinmap k new_v)
+ Just old_v -> case eltJoin l (OldFact old_v) (NewFact new_v) of
+ (SomeChange, v') -> (SomeChange, addToUFM_Directly joinmap k v')
+ (NoChange, _) -> (ch, joinmap)
+
+assignmentLattice :: DataflowLattice AssignmentMap
+assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
+ where -- ToDo: Factor out 'add', which is useful for any lattice map
+ -- that manually tracks registers
+#if DEBUG
+ add l (OldFact x@(r, old)) (NewFact (r', new))
+ | r /= r' = panic "assignmentLattice: distinct registers in same mapping"
+#else
+ add l (OldFact x@(r, old)) (NewFact (_, new))
+#endif
+ | otherwise =
+ case extendJoinDomain add' l (OldFact old) (NewFact new) of
+ (NoChange, _) -> (NoChange, x)
+ (SomeChange, v) -> (SomeChange, (r, v))
+ add' _ (OldFact old) (NewFact new)
+ | old == new = (NoChange, PElem old)
+ | otherwise = (SomeChange, Top)
+-- ToDo: I feel like there's a monad squirreled away here somewhere...
+
+middleAssignment :: CmmNode O O -> AssignmentMap -> AssignmentMap
+-- Stack slots for registers are treated specially: we maintain
+-- the invariant that the stack slot will always accurately reflect the
+-- contents of a variable. This invariant only holds because we only
+-- generate register slots during the spilling phase, and the spilling
+-- phase always spills when the register changes, so we will never see a
+-- lone 'abc = ...' (where ... is not a reload) without a subsequent
+-- spill. (We could probably write another quick analysis to check that
+-- this invariant holds.) If this invariant holds, then any reload
+-- (that's what an assignment to the register from a stack slot is)
+-- simply adds a register to the available assignments without
+-- invalidating any existing references to the register.
+middleAssignment (CmmAssign (CmmLocal r) e@(CmmLoad l _)) assign
+ | l `isStackSlotOf` r = addToUFM assign r (r, PElem e)
+-- When we assign anything else to a register, we invalidate all current
+-- assignments that contain an assignment to that register, and then, if
+-- it's a local assignment, add this assignment to our map. (Note: We
+-- could do something clever here for simple Hp = Hp + 8 style
+-- assignments by simply aggressively inlining that addition.)
+middleAssignment (CmmAssign reg e) assign
+ = f (mapUFM p assign)
+ where p (r', PElem e') | reg `regUsedIn` e' = (r', Top)
+ p old = old
+ f m | (CmmLocal r) <- reg = addToUFM m r (r, PElem e)
+ | otherwise = m
+-- Once again, treat stores of registers to register slots specially
+middleAssignment (CmmStore l (CmmReg (CmmLocal r))) assign
+ | l `isStackSlotOf` r = assign
+-- When a store occurs, invalidate all current assignments that rely on
+-- the memory location that got clobbered. Note that stack slots have
+-- already been handled.
+middleAssignment (CmmStore lhs rhs) assign
+ = mapUFM p assign
+ where p (r, PElem e) | (lhs, rhs) `clobbers` (r, e) = (r, Top)
+ p old = old
+-- This is tricky, because whether or not an unsafe foreign call is safe
+-- depends on how far along the pipeline we are. Current choice is
+-- conservative.
+middleAssignment (CmmUnsafeForeignCall {}) assign = mapUFM (\(r,_) -> (r,Top)) assign
+middleAssignment (CmmComment {}) assign = assign
+
+-- Assumptions:
+-- * Stack slots do not overlap with any other memory locations
+-- * Non stack-slot stores always conflict with each other. (This is
+-- not always the case; we could probably do something special for Hp)
+-- * Stack slots for different areas do not overlap
+-- * Stack slots within the same area and different offsets may
+-- overlap; we need to do a size check (see 'overlaps').
+clobbers :: (CmmExpr, CmmExpr) -> (LocalReg, CmmExpr) -> Bool
+clobbers (ss at CmmStackSlot{}, CmmReg (CmmLocal r)) (r', CmmLoad (ss'@CmmStackSlot{}) _)
+ | r == r', ss == ss' = False -- No-op on the stack slot
+clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+ = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
+ f (CmmLoad e _) = containsStackSlot e
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+ -- Maybe there's an invariant broken if this actually ever
+ -- returns True
+ containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
+ containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
+ containsStackSlot (CmmStackSlot{}) = True
+ containsStackSlot _ = False
+clobbers _ (_, e) = f e
+ where f (CmmLoad (CmmStackSlot _ _) _) = False
+ f (CmmLoad{}) = True -- conservative
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+
+-- Diagram:
+-- 4 8 12
+-- s -w- o
+-- [ I32 ]
+-- [ F64 ]
+-- s' -w'- o'
+type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+overlaps :: CallSubArea -> CallSubArea -> Bool
+overlaps (a, _, _) (a', _, _) | a /= a' = False
+overlaps (_, o, w) (_, o', w') =
+ let s = o - w
+ s' = o' - w'
+ in (s' <= o) && (s <= o)
+
+lastAssignment :: CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
+-- Also very conservative choices
+lastAssignment (CmmCall _ (Just k) _ _ _) assign = [(k, mapUFM (\(r,_) -> (r,Top)) assign)]
+lastAssignment (CmmForeignCall {succ=k}) assign = [(k, mapUFM (\(r,_) -> (r,Top)) assign)]
+lastAssignment l assign = map (\id -> (id, assign)) $ successors l
+
+assignmentTransfer :: FwdTransfer CmmNode AssignmentMap
+assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
+
+insertLateAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+insertLateAssignments g =
+ liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot assignmentLattice)] $
+ analRewFwd assignmentLattice assignmentTransfer rewrites
+ where rewrites = mkFRewrite3 first middle last
+ first _ _ = return Nothing
+ middle m assign = return $ maybe_assign_before assign m (mkMiddle m)
+ last l assign = return $ maybe_assign_before assign l (mkLast l)
+ maybe_assign_before assign node tail =
+ let used = foldRegsUsed f emptyUFM node :: UniqFM (LocalReg, CmmExpr)
+ f t r = case lookupUFM assign r of
+ Just (r, (PElem e)) -> addToUFM t r (r, e)
+ _ -> t
+ in if isNullUFM used then Nothing
+ else Just $ lateAssign used tail
+ -- XXX need to do a cost check
+ lateAssign regset t = foldl rel t $ eltsUFM regset
+ where rel t (r, e) = mkMiddle (CmmAssign (CmmLocal r) e) <*> t
---------------------
-- prettyprinting
@@ -305,12 +379,6 @@ instance Outputable DualLive where
if isEmptyUniqSet stack then PP.empty
else (ppr_regs "live on stack =" stack)]
-instance Outputable AvailRegs where
- ppr (UniverseMinus s) = if isEmptyUniqSet s then text "<everything available>"
- else ppr_regs "available = all but" s
- ppr (AvailRegs s) = if isEmptyUniqSet s then text "<nothing available>"
- else ppr_regs "available = " s
-
my_trace :: String -> SDoc -> a -> a
my_trace = if False then pprTrace else \_ _ a -> a
diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs
index 293e48e..63b1724 100644
--- a/compiler/utils/UniqFM.lhs
+++ b/compiler/utils/UniqFM.lhs
@@ -20,6 +20,7 @@ and ``addToUFM\_C'' and ``Data.IntMap.insertWith'' differ in the order
of arguments of combining function.
\begin{code}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# OPTIONS -Wall #-}
module UniqFM (
-- * Unique-keyed mappings
@@ -45,7 +46,7 @@ module UniqFM (
intersectUFM,
intersectUFM_C,
foldUFM, foldUFM_Directly,
- mapUFM,
+ mapUFM, mapUFM_Directly,
elemUFM, elemUFM_Directly,
filterUFM, filterUFM_Directly,
sizeUFM,
@@ -122,6 +123,7 @@ intersectUFM_C :: (elt1 -> elt2 -> elt3)
foldUFM :: (elt -> a -> a) -> a -> UniqFM elt -> a
foldUFM_Directly:: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
mapUFM :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2
+mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2
filterUFM :: (elt -> Bool) -> UniqFM elt -> UniqFM elt
filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt
@@ -153,6 +155,7 @@ ufmToList :: UniqFM elt -> [(Unique, elt)]
\begin{code}
newtype UniqFM ele = UFM (M.IntMap ele)
+ deriving (Eq)
emptyUFM = UFM M.empty
isNullUFM (UFM m) = M.null m
@@ -188,6 +191,7 @@ intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith f x y)
foldUFM k z (UFM m) = M.fold k z m
foldUFM_Directly k z (UFM m) = M.foldWithKey (k . getUnique) z m
mapUFM f (UFM m) = UFM (M.map f m)
+mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . getUnique) m)
filterUFM p (UFM m) = UFM (M.filter p m)
filterUFM_Directly p (UFM m) = UFM (M.filterWithKey (p . getUnique) m)
--
1.7.4.2
More information about the Cvs-ghc
mailing list