{-# OPTIONS_GHC -XModalTypes -XMultiParamTypeClasses -ddump-types #-} module RegexMatcher where import GHC.HetMet.CodeTypes hiding ((-)) -------------------------------------------------------------------------------- -- -- A one-level Regular Expression matcher, adapted from -- Nanevski+Pfenning's _Staged computation with names and necessity_, -- Figure 5 -- data Regex = Empty | Plus Regex Regex | Times Regex Regex | Star Regex | Const Char class Stream a where s_empty :: a -> Bool s_head :: a -> Char s_tail :: a -> a -- A continuation-passing-style matcher. If the "b" argument is false -- the expression must match at least one character before passing -- control to the continuation (this avoids the equality test in the -- Nanevski+Pfenning code) accept :: Stream s => Regex -> Bool -> -- may only match the empty string if this is True (s -> Bool) -> s -> Bool accept Empty False k s = False accept Empty True k s = k s accept (Plus e1 e2) emptyOk k s = (accept e1 emptyOk k s) || (accept e2 emptyOk k s) accept (Times e1 e2) True k s = (accept e1 True (accept e2 True k)) s accept (Times e1 e2) False k s = (accept e1 False (accept e2 True k)) s || (accept e1 True (accept e2 False k)) s accept (Star e) emptyOk k s = (emptyOk && (k s)) || (accept e emptyOk (\s' -> accept (Star e) False k s') s) accept (Const c) emptyOk k s = if s_empty s then False else (s_head s) == c && k (s_tail s) -------------------------------------------------------------------------------- -- -- A two-level Regular Expression matcher, adapted from -- Nanevski+Pfenning's _Staged computation with names and necessity_, -- Figure 6 -- class GuestStream g a where <[ gs_empty ]> :: <[ a -> Bool ]>@g <[ gs_head ]> :: <[ a -> Char ]>@g <[ gs_tail ]> :: <[ a -> a ]>@g class GuestEqChar g where <[ (==) ]> :: <[ Char -> Char -> Bool ]>@g staged_accept :: Regex -> Bool -> forall c s. GuestStream c s => GuestCharLiteral c => GuestLanguageBool c => GuestEqChar c => <[ s -> Bool ]>@c -> <[ s -> Bool ]>@c staged_accept Empty False k = <[ \s -> false ]> staged_accept Empty True k = <[ \s -> gs_empty s ]> staged_accept (Plus e1 e2) emptyOk k = <[ \s -> let k' = ~~k in (~~(staged_accept e1 emptyOk <[k']>) s) || (~~(staged_accept e2 emptyOk <[k']>) s) ]> staged_accept (Times e1 e2) True k = <[ \s -> ~~(staged_accept e1 True (staged_accept e2 True k)) s ]> staged_accept (Times e1 e2) emptyOk k = <[ \s -> ~~(staged_accept e1 True (staged_accept e2 False k)) s || ~~(staged_accept e1 False (staged_accept e2 True k)) s ]> staged_accept (Star e) emptyOk' k = loop emptyOk' where -- Note that the type of "loop" is NOT (forall c s. <[s -> Bool]>@c) -- because "k" is free here; this restrictionis analogous to the free -- environment variable in Nanevski's example. loop emptyOk = if emptyOk then <[ \s -> ~~k s || ~~(staged_accept e True (loop False)) s ]> else <[ \s -> ~~(staged_accept e False (loop False)) s ]> staged_accept (Const c) emptyOk k = <[ \s -> if gs_empty s then false else (gs_head s) == ~~(guestCharLiteral c) && ~~k (gs_tail s) ]> -- -- Take particular note of the "Plus" case above: note that (following -- Nanevski+Pfenning) the code for "k" is not duplicated -- it is -- escapified into the constructed term only once, and a tiny scrap of -- code containing nothing more than the variable name k' is passed -- to the recursive call. This is in contrast with the naive implementation -- shown below: -- -- staged_accept (Plus e1 e2) emptyOk k = -- <[ \s -> (~~(staged_accept e1 emptyOk k) s) || -- (~~(staged_accept e2 emptyOk k) s) -- ]> -- -- -- The following commented-out type is "too polymorphic" -- try -- uncommenting it to see what happens. It's a great example of the -- kind of thing that environment classifiers guard against: the -- continuation code and the result code get their classifiers -- unified. -- {- staged_accept :: Regex -> Bool -> (forall c s. GuestStream c s => GuestCharLiteral c => GuestLanguageBool c => GuestEqChar c => <[s -> Bool]>@c) -> (forall c s. GuestStream c s => GuestCharLiteral c => GuestLanguageBool c => GuestEqChar c => <[s -> Bool]>@c) -}