1 {-# OPTIONS_GHC -XModalTypes -XScopedTypeVariables -XFlexibleContexts -XMultiParamTypeClasses -ddump-types -XNoMonoPatBinds #-}
6 import GHC.HetMet.CodeTypes hiding ((-))
7 import GHC.HetMet.GArrow
9 -- The best way to understand heterogeneous metaprogramming and
10 -- generalized arrows is to play around with this file, poking at the
11 -- examples until they fail to typecheck -- you'll learn a lot that
14 -- Once you've built the modified compiler, you can compile this file
17 -- $ inplace/bin/ghc-stage2 tutorial.hs
20 -- -XModalTypes adds a new syntactical expression, "code brackets":
21 code_fst = <[ \(x,y) -> x ]>
23 -- This new expression is the introduction form for modal types:
24 code_fst :: forall a b g. <[ (a,b) -> a ]>@g
26 -- Think of <[T]>@g as being the type of programs written in language
27 -- "g" which, when "executed", return a value of type "T". I mention
28 -- "language g" because the *heterogeneous* aspect of HetMet means
29 -- that we can limit the sorts of constructs allowed inside the code
30 -- brackets, permitting only a subset of Haskell (you have to use
31 -- Haskell syntax, though).
33 -- There is a second new expression form, "~~", called "escape":
35 code_fst_fst = <[ \z -> ~~code_fst (~~code_fst z) ]>
37 -- Note that ~~ binds more tightly than any other operator. There is
38 -- an alternate version, "~~$", which binds more weakly than any other
39 -- operator (this is really handy sometimes!). To demonstrate this,
40 -- the next two expressions differ only in superficial syntax:
42 example1 foo bar = <[ ~~$ foo bar ]>
43 example2 foo bar = <[ ~~( foo bar) ]>
44 -- example3 foo bar = <[ ~~ foo bar ]>
46 -- ... but the third one is completely different (and in fact, doesn't
47 -- even parse, but we'll get to that in a moment)
49 -- The escape operation must appear within code brackets. In truth,
50 -- it is really a "hole" punched in the code brackets -- the thing to
51 -- which the escape operator gets applied is typed as if it were
52 -- *outside* the code brackets. It must have type <[T]>, and the
53 -- escape operator allows it to be used *inside* code brackets as if
56 -- So, the escape operator is basically a way of pasting code
57 -- fragments into each other.
59 -- This is where those type variables after the "@" sign come in: if
60 -- you paste two pieces of code into a third, all three must be
61 -- written in the same language. We express this by unifying their
64 compose_code :: forall g a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@g
65 compose_code x y = <[ \z -> ~~y (~~x z) ]>
67 -- Now, try commenting out the type ascription above and uncommenting
68 -- any of these three:
70 -- compose_code :: forall g h a b c. <[a->b]>@h -> <[b->c]>@g -> <[a->c]>@g
71 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@h -> <[a->c]>@g
72 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@h
75 -- The typechecker won't let you get away with that -- you're trying
76 -- to force a type which is "too polymorphic" onto paste2. If the
77 -- compiler allowed that, the resulting metaprogram might try to
78 -- splice together programs written in different languages, resulting
81 -- NEW SCOPING RULES: The syntactical depth (or just "depth") of an
82 -- expression is the number of surrounding code-brackets minus the
83 -- number of surrounding escapes (this is strictly a syntax concept
84 -- and has NOTHING to do with the type system!). It is very important
85 -- to keep in mind that the scope of a bound variable extends only to
86 -- expressions at the same depth! To demonstrate, the following
87 -- expression will fail to parse:
89 -- badness = \x -> <[ x ]>
91 -- ...and in the following expression, the occurrence of "x" is bound
92 -- by the first (outer) lambda, not the second one:
94 no_shadowing_here = \x -> <[ \x -> ~~x ]>
96 -- Lastly, you can wrap code-brackets around an identifier in a
97 -- top-level, let, or where binding. Notice how GHC doesn't complain
98 -- here about defining an identifier twice!
101 <[ foo ]> = <[ \(x::Bool) -> x ]>
103 -- Now you can use foo (the second one!) inside code-brackets:
105 bar x = <[ foo ~~x ]>
107 bar :: forall g. <[Bool]>@g -> <[Bool]>@g
109 -- In fact, the identifiers have completely unrelated types. Which
110 -- brings up another important point: types are ALWAYS assigned
111 -- "relative to" depth zero. So although we imagine "foo" existing at
112 -- depth-one, its type is quite firmly established as <[ Bool -> Bool ]>
114 -- It has to be this way -- to see why, consider a term which is more
115 -- polymorphic than "foo":
117 <[ foo' ]> = <[ \x -> x ]>
121 <[ foo' ]> :: forall a g . <[ a -> a ]>@g
123 -- ...and there's no way to express the g-polymorphism entirely from
124 -- within the brackets.
126 -- So why does all of this matter? Mainly so that we can continue to use . We'd like
127 -- the "+" operator to work "as expected" -- in other words, we'd like
128 -- people to be able to write things like
130 increment_at_level1 = <[ \x -> x + 1 ]>
132 -- However, in unmodified haskell an identifier like (+) may have only
133 -- one type. In this case that type is:
135 -- (+) :: Num a => a -> a -> a
137 -- Now, we could simply decree that when (+) appears inside code
138 -- brackets, an "implicit ~~" is inserted, so the desugared expression
141 -- increment_at_level1 = <[ \x -> ~~(+) x 1 ]>
143 -- unfortunately this isn't going to work for guest languages that
144 -- don't have higher-order functions. Haskell uses curried arguments
145 -- because it has higher-order functions, but in a first-order guest
146 -- language a more sensible type for (+) would be:
148 -- (+) :: Num a => (a,a) -> a
150 -- ... or even something less polymorphic, like
152 -- (+) :: (Int,Int) -> Int
154 -- so to maintain flexibility, we allow an identifier to have
155 -- different types at different syntactic depths; this way type
156 -- choices made for Haskell don't get imposed on guest languages that
157 -- are missing some of its features.
159 -- In hindsight, what we REALLY want is for increment_at_level1 to
160 -- be desugared like this (much like the Arrow (|...|) syntax):
162 -- increment_at_level1 = <[ \x -> ~~( <[x]> + <[1]> ) ]>
164 -- ... because then we can declare
166 -- instance Num a => Num <[a]> where ...
170 -- instance Num <[Int]> where ...
172 -- unfortunately there's a major problem: knowing how to do this sort
173 -- of desugaring requires knowing the *arity* of a function. For
174 -- symbols we can kludge it by checking Haskell's parsing rules (there
175 -- are only a handful of unary symbols; all others are binary), but
176 -- this is crude and won't work at all for non-symbol identifiers.
177 -- And we can look at a type like x->y->z and say "oh, that's a
178 -- two-argument function", but sometimes GHC doesn't know the complete
179 -- type of an identifier in the midst of unification (i.e. "x has type
180 -- Int->a for some a, where a could be Int or Int->Int"), so guessing
181 -- the arity from the type cannot be done during parsing, which is
182 -- when we need to do this.
184 -- Okay, I think that's more or less a brain dump of why I changed the
185 -- scoping rules and the problems with the other solutions I tried.
187 -- I am very interested in hearing any suggestions on better ways of
188 -- dealing with this, so long as you can still use operators like (+)
189 -- in guest languages without higher-order functions.
197 -- The rest of this file contains a bunch of example programs:
198 -- exponentiation, dot-product, a bunch of classic MetaML idioms, and
199 -- a translation of Nanevski+Pfenning's two-stage regex matcher.
206 --------------------------------------------------------------------------------
207 -- Ye Olde and Most Venerable "pow" Function
212 else <[ \x -> x * ~~(pow (n - 1)) x ]>
215 -- a more efficient two-level pow
216 pow' 0 = <[ \x -> 1 ]>
217 pow' 1 = <[ \x -> x ]>
218 pow' n = if n `mod` 2==0
219 then <[ \x -> (\y -> y*y) (~~(pow' $ n `shiftR` 2) x) ]>
220 else <[ \x -> x * ~~(pow' $ n-1) x ]>
232 --------------------------------------------------------------------------------
235 -- This shows how to build a two-level program one step at a time by
236 -- slowly rearranging it until the brackets can be inserted.
239 -- a one-level function to compute the dot product of two vectors
240 dotproduct :: [Int] -> [Int] -> Int
247 (a*b)+(dotproduct ax bx)
249 -- A slightly modified version of the dot product: note that we
250 -- check for zeroes and ones to avoid multiplying. In a one-level
251 -- program this yields no advantage, however!
252 dotproduct' :: [Int] -> [Int] -> Int
258 (b:bx) -> (dotproduct' ax bx)
261 (b:bx) -> b+(dotproduct' ax bx)
265 (a*b)+(dotproduct' ax bx)
267 -- A two-level version of the dot product. Note how we ask for the first
268 -- vector, then produce a program which is optimized for multiplying
269 -- by that particular vector. If there are zeroes or ones in the
270 -- original vector, we will emit code which is faster than a one-level
273 --dotproduct'' :: forall g.
274 -- GuestLanguageAdd g Int =>
275 -- GuestLanguageMult g Int =>
276 -- GuestLanguageFromInteger g Int =>
277 -- [Int] -> <[ [Int] -> Int ]>@g
281 (0:ax) -> <[ \v2 -> case v2 of
283 (b:bx) -> ~~(dotproduct'' ax) bx ]>
284 (1:ax) -> <[ \v2 -> case v2 of
286 (b:bx) -> b + ~~(dotproduct'' ax) bx ]>
288 (a:ax) -> <[ \v2 -> case v2 of
290 (b:bx) -> ~~(guestIntegerLiteral a) * b + ~~(dotproduct'' ax) bx ]>
297 --------------------------------------------------------------------------------
298 -- Taha-Sheard "isomorphism for code types"
300 back :: forall a b c. (<[b]>@a -> <[c]>@a) -> <[ b->c ]>@a
301 back = \f -> <[ \x -> ~~(f <[x]>) ]>
303 forth :: forall a b c. <[b->c]>@a -> (<[b]>@a -> <[c]>@a)
304 forth = \f -> \x -> <[ ~~f ~~x ]>
308 --------------------------------------------------------------------------------
309 -- Examples of "running" code; these examples illustrate the sorts of
310 -- scoping problems that the Taha-Nielsen environment classifiers look
311 -- for in the context of HOMOGENEOUS metaprogramming. You can't
312 -- actually define these functions for ALL generalized arrows -- only
313 -- those for which you've defined some sort of interpretation in Haskell.
315 run :: forall a. (forall b. <[a]>@b) -> a
318 -- the typchecker correctly rejects this bogosity if you uncomment it:
319 -- bogus = <[ \x -> ~~( run <[ x ]> ) ]>
321 -- The Calcano-Moggi-Taha paper on environment classifier inference
322 -- had a special type for closed code and two special expressions
323 -- "close" and "open". These are unnecessary in SystemFC1 where we
324 -- can use higher-rank polymorphism to get the same result (although
325 -- in truth it's cheating a bit since their type inference is
326 -- decidable with no annotations, whereas Rank-N inference is not):
328 data ClosedCode a = ClosedCode (forall b. <[a]>@b)
330 open :: forall a b. ClosedCode a -> <[a]>@b
331 open (ClosedCode x) = x
333 close :: (forall b. <[a]>@b) -> ClosedCode a
334 close x = ClosedCode x
336 run_closed :: ClosedCode a -> a
337 run_closed = undefined
341 --------------------------------------------------------------------------------
342 -- A two-level Regular Expression matcher, adapted from Nanevski+Pfenning, Figure 6
355 -- a continuation-passing-style matcher
357 accept :: Stream s => Regex -> (s -> Bool) -> s -> Bool
362 accept (Plus e1 e2) k s =
363 (accept e1 k s) || (accept e2 k s)
365 accept (Times e1 e2) k s =
366 (accept e1 (accept e2 k)) s
368 accept (Star e) k s =
369 (k s) || (accept e (\s' -> accept (Star e) k s') s)
370 -- FIXME: this will loop forever if you give it (Star x) where x can
371 -- match the empty string
373 accept (Const c) k s =
376 else (s_head s) == c && k (s_tail s)
378 class GuestStream g a where
379 <[ gs_empty ]> :: <[ a -> Bool ]>@g
380 <[ gs_head ]> :: <[ a -> Char ]>@g
381 <[ gs_tail ]> :: <[ a -> a ]>@g
383 class GuestEqChar g where
384 <[ (==) ]> :: <[ Char -> Char -> Bool ]>@g
390 GuestCharLiteral c =>
391 GuestLanguageBool c =>
396 staged_accept Empty k =
397 <[ \s -> gs_empty s ]>
399 -- note that code for "k" gets duplicated here
400 staged_accept (Plus e1 e2) k =
401 <[ \s -> (~~(staged_accept e1 k) s) || (~~(staged_accept e2 k) s) ]>
403 staged_accept (Times e1 e2) k =
404 <[ \s -> ~~(staged_accept e1 (staged_accept e2 k)) s ]>
406 staged_accept (Star e) k =
409 -- loop :: <[s -> Bool]>@g
410 loop = <[ \s -> ~~k s || ~~(staged_accept e loop) s ]>
411 -- note that loop is not (forall c s. <[s -> Bool]>@c)
412 -- because "k" is free in loop; it is analogous to the free
413 -- environment variable in Nanevski's example
415 staged_accept (Const c) k =
416 <[ \s -> if gs_empty s
418 else (gs_head s) == ~~(guestCharLiteral c) && ~~k (gs_tail s) ]>
420 -- this type won't work unless the case for (Star e) is commented out;
424 -- GuestStream c s =>
425 -- GuestLanguageBool c =>
429 -- GuestStream c s =>
430 -- GuestLanguageBool c =>
436 --------------------------------------------------------------------------------
437 -- An example generalized arrow
439 -- *** this will be finished and posted by 14-Mar-2011; the code
440 -- *** below is just a sketch ***
443 -- A verilog module is an SDoc (chunk of text) giving the module's
444 -- definition. The UniqueSupply avoids name clashes.
447 [VerilogModule] -- dependencies
448 String -> -- module name
449 (Tree String -> -- input port names
450 Tree String -> -- output port names
451 SDoc) -- raw verilog code for the body
454 instance Show VerilogModule where
455 show VerilogModule dep name body =
456 "module "++name++"(FIXME)"++(body FIXME FIXME)
458 data VerilogWrappedType a =
459 { vwt_rep :: String }
461 -- A "verilog garrow" from A to B is, concretely, the source code for a
462 -- verilog module having input ports of type A and output ports of type B;
463 -- the UniqueSupply lets us generate names.
464 data GArrowVerilog a b =
466 VerilogWrappedType a ->
467 VerilogWrappedType b ->
470 instance GArrow GArrowVerilog (,) where
471 ga_id = VerilogModule [] "ga_id" (\ inp outp -> zipTree ... "assign "++outp++" = "++inp)
472 ga_comp f g = VerilogModule [] "ga_comp"
473 ga_first :: g x y -> g (x ** z) (y ** z)
474 ga_second f = ga_comp (ga_comp ga_swap (ga_first f)) ga_swap
475 ga_cancell f = VerilogModule [] "ga_cancell" (\ [in1,in2] [outp] -> "assign "++outp++" = "++in2)
476 ga_cancelr f = VerilogModule [] "ga_cancelr" (\ [in1,in2] [outp] -> "assign "++outp++" = "++in1)
477 ga_uncancell f = VerilogModule [] "ga_cancelr" (\ [in1] [out1,out2] -> "assign "++out1++"=1'b0;\n assign"++out2++"="++in1)
478 ga_uncancelr f = VerilogModule [] "ga_cancelr" (\ [in1] [out1,out2] -> "assign "++out2++"=1'b0;\n assign"++out1++"="++in1)
480 ga_unassoc :: g (x**(y**z)) ((x**y)**z)
482 instance GArrowDrop GArrowVerilog (,) where
485 instance GArrowCopy GArrowVerilog (,) where
488 instance GArrowSwap GArrowVerilog (,) where
491 instance GArrowLoop GArrowVerilog (,) where
494 instance GArrowLiteral GArrowVerilog (,) where