examples/tutorial.hs

   1 {-# OPTIONS_GHC -XModalTypes -XScopedTypeVariables -XFlexibleContexts -XMultiParamTypeClasses -ddump-types -XNoMonoPatBinds #-}
   2 module GArrowsTutorial
   3 where
   4 import Data.Bits
   5 import Data.Bool (not)
   6 import GHC.HetMet.CodeTypes hiding ((-))
   7 import GHC.HetMet.GArrow
   8
   9 -- The best way to understand heterogeneous metaprogramming and
  10 -- generalized arrows is to play around with this file, poking at the
  11 -- examples until they fail to typecheck -- you'll learn a lot that
  12 -- way!
  13
  14 -- Once you've built the modified compiler, you can compile this file
  15 -- with:
  16 --
  17 --    $ inplace/bin/ghc-stage2 tutorial.hs
  18 --
  19
  20 -- -XModalTypes adds a new syntactical expression, "code brackets":
  21 code_fst = <[ \(x,y) -> x ]>
  22
  23 -- This new expression is the introduction form for modal types:
  24 code_fst :: forall a b g. <[ (a,b) -> a ]>@g
  25
  26 -- Think of <[T]>@g as being the type of programs written in language
  27 -- "g" which, when "executed", return a value of type "T".  I mention
  28 -- "language g" because the *heterogeneous* aspect of HetMet means
  29 -- that we can limit the sorts of constructs allowed inside the code
  30 -- brackets, permitting only a subset of Haskell (you have to use
  31 -- Haskell syntax, though).
  32
  33 -- There is a second new expression form, "~~", called "escape":
  34
  35 code_fst_fst = <[ \z -> ~~code_fst (~~code_fst z) ]>
  36
  37 -- Note that ~~ binds more tightly than any other operator.  There is
  38 -- an alternate version, "~~$", which binds more weakly than any other
  39 -- operator (this is really handy sometimes!).  To demonstrate this,
  40 -- the next two expressions differ only in superficial syntax:
  41
  42 example1    foo bar = <[ ~~$ foo bar  ]>
  43 example2    foo bar = <[ ~~( foo bar) ]>
  44 -- example3 foo bar = <[ ~~  foo bar  ]>
  45
  46 -- ... but the third one is completely different (and in fact, doesn't
  47 -- even parse, but we'll get to that in a moment)
  48
  49 -- The escape operation must appear within code brackets.  In truth,
  50 -- it is really a "hole" punched in the code brackets -- the thing to
  51 -- which the escape operator gets applied is typed as if it were
  52 -- *outside* the code brackets.  It must have type <[T]>, and the
  53 -- escape operator allows it to be used *inside* code brackets as if
  54 -- it had type "T".
  55
  56 -- So, the escape operator is basically a way of pasting code
  57 -- fragments into each other.
  58
  59 -- This is where those type variables after the "@" sign come in: if
  60 -- you paste two pieces of code into a third, all three must be
  61 -- written in the same language.  We express this by unifying their
  62 -- tyvars:
  63
  64 compose_code :: forall g a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@g
  65 compose_code x y = <[ \z -> ~~y (~~x z) ]>
  66
  67 -- Now, try commenting out the type ascription above and uncommenting
  68 -- any of these three:
  69 --
  70 -- compose_code :: forall g h a b c. <[a->b]>@h -> <[b->c]>@g -> <[a->c]>@g
  71 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@h -> <[a->c]>@g
  72 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@h
  73 --
  74
  75 -- The typechecker won't let you get away with that -- you're trying
  76 -- to force a type which is "too polymorphic" onto paste2.  If the
  77 -- compiler allowed that, the resulting metaprogram might try to
  78 -- splice together programs written in different languages, resulting
  79 -- in mayhem.
  80
  81 -- NEW SCOPING RULES: The syntactical depth (or just "depth") of an
  82 -- expression is the number of surrounding code-brackets minus the
  83 -- number of surrounding escapes (this is strictly a syntax concept
  84 -- and has NOTHING to do with the type system!).  It is very important
  85 -- to keep in mind that the scope of a bound variable extends only to
  86 -- expressions at the same depth!  To demonstrate, the following
  87 -- expression will fail to parse:
  88
  89 -- badness = \x -> <[ x ]>
  90
  91 -- ...and in the following expression, the occurrence of "x" is bound
  92 -- by the first (outer) lambda, not the second one:
  93
  94 no_shadowing_here = \x -> <[ \x -> ~~x ]>
  95
  96 -- Lastly, you can wrap code-brackets around an identifier in a
  97 -- top-level, let, or where binding.  Notice how GHC doesn't complain
  98 -- here about defining an identifier twice!
  99
 100 foo       =    \x         -> x+1
 101 <[ foo ]> = <[ \(x::Bool) -> x   ]>
 102
 103 -- Now you can use foo (the second one!) inside code-brackets:
 104
 105 bar x = <[ foo ~~x ]>
 106
 107 bar :: forall g. <[Bool]>@g -> <[Bool]>@g
 108
 109 -- In fact, the identifiers have completely unrelated types.  Which
 110 -- brings up another important point: types are ALWAYS assigned
 111 -- "relative to" depth zero.  So although we imagine "foo" existing at
 112 -- depth-one, its type is quite firmly established as <[ Bool -> Bool ]>
 113
 114 -- It has to be this way -- to see why, consider a term which is more
 115 -- polymorphic than "foo":
 116
 117 <[ foo' ]> = <[ \x -> x ]>
 118
 119 -- Its type is:
 120
 121 <[ foo' ]> :: forall a g . <[ a -> a ]>@g
 122
 123 -- ...and there's no way to express the g-polymorphism entirely from
 124 -- within the brackets.
 125
 126 -- So why does all of this matter?  Mainly so that we can continue to use .  We'd like
 127 -- the "+" operator to work "as expected" -- in other words, we'd like
 128 -- people to be able to write things like
 129
 130 increment_at_level1 = <[ \x -> x + 1 ]>
 131
 132 -- However, in unmodified haskell an identifier like (+) may have only
 133 -- one type.  In this case that type is:
 134 --
 135 --     (+) :: Num a => a -> a -> a
 136 --
 137 -- Now, we could simply decree that when (+) appears inside code
 138 -- brackets, an "implicit ~~" is inserted, so the desugared expression
 139 -- is:
 140 --
 141 --    increment_at_level1 = <[ \x -> ~~(+) x 1 ]>
 142 --
 143 -- unfortunately this isn't going to work for guest languages that
 144 -- don't have higher-order functions.  Haskell uses curried arguments
 145 -- because it has higher-order functions, but in a first-order guest
 146 -- language a more sensible type for (+) would be:
 147 --
 148 --    (+) :: Num a => (a,a) -> a
 149 --
 150 -- ... or even something less polymorphic, like
 151 --
 152 --    (+) :: (Int,Int) -> Int
 153 --
 154 -- so to maintain flexibility, we allow an identifier to have
 155 -- different types at different syntactic depths; this way type
 156 -- choices made for Haskell don't get imposed on guest languages that
 157 -- are missing some of its features.
 158 --
 159 -- In hindsight, what we REALLY want is for increment_at_level1 to
 160 -- be desugared like this (much like the Arrow (|...|) syntax):
 161 --
 162 --   increment_at_level1 = <[ \x -> ~~( <[x]> + <[1]> ) ]>
 163 --
 164 -- ... because then we can declare
 165 --
 166 --   instance Num a => Num <[a]> where ...
 167 --
 168 -- or just
 169 --
 170 --   instance Num <[Int]> where ...
 171 --
 172 -- unfortunately there's a major problem: knowing how to do this sort
 173 -- of desugaring requires knowing the *arity* of a function.  For
 174 -- symbols we can kludge it by checking Haskell's parsing rules (there
 175 -- are only a handful of unary symbols; all others are binary), but
 176 -- this is crude and won't work at all for non-symbol identifiers.
 177 -- And we can look at a type like x->y->z and say "oh, that's a
 178 -- two-argument function", but sometimes GHC doesn't know the complete
 179 -- type of an identifier in the midst of unification (i.e. "x has type
 180 -- Int->a for some a, where a could be Int or Int->Int"), so guessing
 181 -- the arity from the type cannot be done during parsing, which is
 182 -- when we need to do this.
 183 --
 184 -- Okay, I think that's more or less a brain dump of why I changed the
 185 -- scoping rules and the problems with the other solutions I tried.
 186 --
 187 -- I am very interested in hearing any suggestions on better ways of
 188 -- dealing with this, so long as you can still use operators like (+)
 189 -- in guest languages without higher-order functions.
 190 --
 191
 192
 193
 194
 195
 196
 197 -- The rest of this file contains a bunch of example programs:
 198 -- exponentiation, dot-product, a bunch of classic MetaML idioms, and
 199 -- a translation of Nanevski+Pfenning's two-stage regex matcher.
 200
 201
 202
 203
 204
 205
 206 --------------------------------------------------------------------------------
 207 -- Ye Olde and Most Venerable "pow" Function
 208
 209 pow n =
 210    if n==0
 211    then <[ \x -> 1 ]>
 212    else <[ \x -> x * ~~(pow (n - 1)) x ]>
 213
 214
 215 -- a more efficient two-level pow
 216 pow' 0  = <[ \x -> 1 ]>
 217 pow' 1  = <[ \x -> x ]>
 218 pow' n  = if   n `mod` 2==0
 219           then <[ \x -> (\y -> y*y) (~~(pow' $ n `shiftR` 2) x) ]>
 220           else <[ \x -> x * ~~(pow' $ n-1) x ]>
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232 --------------------------------------------------------------------------------
 233 -- Dot Product
 234 --
 235 --  This shows how to build a two-level program one step at a time by
 236 --  slowly rearranging it until the brackets can be inserted.
 237 --
 238
 239 -- a one-level function to compute the dot product of two vectors
 240 dotproduct :: [Int] -> [Int] -> Int
 241 dotproduct v1 v2 =
 242   case v1 of
 243     []     -> 0
 244     (a:ax) -> case v2 of
 245                    []     -> 0
 246                    (b:bx) ->
 247                        (a*b)+(dotproduct ax bx)
 248
 249 -- A slightly modified version of the dot product: note that we
 250 -- check for zeroes and ones to avoid multiplying.  In a one-level
 251 -- program this yields no advantage, however!
 252 dotproduct' :: [Int] -> [Int] -> Int
 253 dotproduct' v1 v2 =
 254   case v1 of
 255     []     -> 0
 256     (0:ax) -> case v2 of
 257                    []     -> 0
 258                    (b:bx) -> (dotproduct' ax bx)
 259     (1:ax) -> case v2 of
 260                    []     -> 0
 261                    (b:bx) -> b+(dotproduct' ax bx)
 262     (a:ax) -> case v2 of
 263                    []     -> 0
 264                    (b:bx) ->
 265                        (a*b)+(dotproduct' ax bx)
 266
 267 -- A two-level version of the dot product.  Note how we ask for the first
 268 -- vector, then produce a program which is optimized for multiplying
 269 -- by that particular vector.  If there are zeroes or ones in the
 270 -- original vector, we will emit code which is faster than a one-level
 271 -- dot product.
 272
 273 --dotproduct'' :: forall g.
 274 --                GuestLanguageAdd         g Int =>
 275 --                GuestLanguageMult        g Int =>
 276 --                GuestLanguageFromInteger g Int =>
 277 --                [Int] -> <[ [Int] -> Int ]>@g
 278 dotproduct'' v1 =
 279   case v1 of
 280     []     -> <[ \v2 -> 0 ]>
 281     (0:ax) -> <[ \v2 -> case v2 of
 282                           []     -> 0
 283                           (b:bx) -> ~~(dotproduct'' ax) bx ]>
 284     (1:ax) -> <[ \v2 -> case v2 of
 285                           []     -> 0
 286                           (b:bx) -> b + ~~(dotproduct'' ax) bx ]>
 287
 288     (a:ax) -> <[ \v2 -> case v2 of
 289                           []     -> 0
 290                           (b:bx) -> ~~(guestIntegerLiteral a) * b + ~~(dotproduct'' ax) bx ]>
 291
 292
 293
 294
 295
 296
 297 --------------------------------------------------------------------------------
 298 -- Taha-Sheard "isomorphism for code types"
 299
 300 back  :: forall a b c. (<[b]>@a -> <[c]>@a) -> <[ b->c ]>@a
 301 back  = \f -> <[ \x -> ~~(f <[x]>) ]>
 302
 303 forth :: forall a b c. <[b->c]>@a -> (<[b]>@a -> <[c]>@a)
 304 forth = \f -> \x -> <[ ~~f ~~x ]>
 305
 306
 307
 308 --------------------------------------------------------------------------------
 309 -- Examples of "running" code; these examples illustrate the sorts of
 310 -- scoping problems that the Taha-Nielsen environment classifiers look
 311 -- for in the context of HOMOGENEOUS metaprogramming.  You can't
 312 -- actually define these functions for ALL generalized arrows -- only
 313 -- those for which you've defined some sort of interpretation in Haskell.
 314
 315 run :: forall a. (forall b. <[a]>@b) -> a
 316 run = undefined
 317
 318 -- the typchecker correctly rejects this bogosity if you uncomment it:
 319 -- bogus = <[ \x -> ~~( run <[ x ]> ) ]>
 320
 321 -- The Calcano-Moggi-Taha paper on environment classifier inference
 322 -- had a special type for closed code and two special expressions
 323 -- "close" and "open".  These are unnecessary in SystemFC1 where we
 324 -- can use higher-rank polymorphism to get the same result (although
 325 -- in truth it's cheating a bit since their type inference is
 326 -- decidable with no annotations, whereas Rank-N inference is not):
 327
 328 data ClosedCode a = ClosedCode (forall b. <[a]>@b)
 329
 330 open :: forall a b. ClosedCode a -> <[a]>@b
 331 open (ClosedCode x) = x
 332
 333 close :: (forall b. <[a]>@b) -> ClosedCode a
 334 close x = ClosedCode x
 335
 336 run_closed :: ClosedCode a -> a
 337 run_closed = undefined
 338
 339
 340
 341 --------------------------------------------------------------------------------
 342 -- A two-level Regular Expression matcher, adapted from Nanevski+Pfenning, Figure 6
 343 data Regex
 344  = Empty
 345  | Plus  Regex Regex
 346  | Times Regex Regex
 347  | Star  Regex
 348  | Const Char
 349
 350 class Stream a where
 351   s_empty :: a -> Bool
 352   s_head  :: a -> Char
 353   s_tail  :: a -> a
 354
 355 -- a continuation-passing-style matcher
 356
 357 accept :: Stream s => Regex -> (s -> Bool) -> s -> Bool
 358
 359 accept Empty k s          =
 360   k s
 361
 362 accept (Plus e1 e2) k s   =
 363  (accept e1 k s) || (accept e2 k s)
 364
 365 accept (Times e1 e2) k s  =
 366  (accept e1 (accept e2 k)) s
 367
 368 accept (Star e) k s       =
 369  (k s) || (accept e (\s' -> accept (Star e) k s') s)
 370  -- FIXME: this will loop forever if you give it (Star x) where x can
 371  -- match the empty string
 372
 373 accept (Const c) k s      =
 374  if s_empty s
 375  then False
 376  else (s_head s) == c && k (s_tail s)
 377
 378 class GuestStream g a where
 379   <[ gs_empty ]> :: <[ a -> Bool ]>@g
 380   <[ gs_head  ]> :: <[ a -> Char ]>@g
 381   <[ gs_tail  ]> :: <[ a -> a    ]>@g
 382
 383 class GuestEqChar g where
 384   <[ (==) ]> :: <[ Char -> Char -> Bool ]>@g
 385
 386 staged_accept ::
 387     Regex
 388     -> forall c s.
 389          GuestStream c s =>
 390          GuestCharLiteral c =>
 391          GuestLanguageBool c =>
 392          GuestEqChar c =>
 393          <[ s -> Bool ]>@c
 394       -> <[ s -> Bool ]>@c
 395
 396 staged_accept Empty k            =
 397    <[ \s -> gs_empty s ]>
 398
 399 -- note that code for "k" gets duplicated here
 400 staged_accept (Plus e1 e2) k     =
 401    <[ \s -> (~~(staged_accept e1 k) s) || (~~(staged_accept e2 k) s) ]>
 402
 403 staged_accept (Times e1 e2) k    =
 404    <[ \s -> ~~(staged_accept e1 (staged_accept e2 k)) s ]>
 405
 406 staged_accept (Star e) k         =
 407    loop
 408     where
 409     -- loop :: <[s -> Bool]>@g
 410      loop =  <[ \s -> ~~k s || ~~(staged_accept e loop) s ]>
 411     -- note that loop is not (forall c s. <[s -> Bool]>@c)
 412     -- because "k" is free in loop; it is analogous to the free
 413     -- environment variable in Nanevski's example
 414
 415 staged_accept (Const c) k        =
 416     <[ \s -> if gs_empty s
 417              then false
 418              else (gs_head s) == ~~(guestCharLiteral c) && ~~k (gs_tail s) ]>
 419
 420 -- this type won't work unless the case for (Star e) is commented out;
 421 -- see loop above
 422 --      Regex
 423 --      -> (forall c s.
 424 --          GuestStream c s =>
 425 --          GuestLanguageBool c =>
 426 --          GuestEqChar c =>
 427 --          <[s -> Bool]>@c)
 428 --     -> (forall c s.
 429 --          GuestStream c s =>
 430 --          GuestLanguageBool c =>
 431 --          GuestEqChar c =>
 432 --          <[s -> Bool]>@c)
 433
 434
 435
 436 --------------------------------------------------------------------------------
 437 -- An example generalized arrow
 438
 439 --  *** this will be finished and posted by 14-Mar-2011; the code
 440 --  *** below is just a sketch ***
 441
 442 {-
 443 -- A verilog module is an SDoc (chunk of text) giving the module's
 444 -- definition.  The UniqueSupply avoids name clashes.
 445 data VerilogModule =
 446   VerilogModule
 447     [VerilogModule]     -- dependencies
 448     String ->           -- module name
 449     (Tree String ->        -- input port names
 450      Tree String ->        -- output port names
 451      SDoc)              -- raw verilog code for the body
 452
 453
 454 instance Show VerilogModule where
 455   show VerilogModule dep name body =
 456     "module "++name++"(FIXME)"++(body FIXME FIXME)
 457
 458 data VerilogWrappedType a =
 459   { vwt_rep :: String }
 460
 461 -- A "verilog garrow" from A to B is, concretely, the source code for a
 462 -- verilog module having input ports of type A and output ports of type B;
 463 -- the UniqueSupply lets us generate names.
 464 data GArrowVerilog a b =
 465   UniqueSupply ->
 466   VerilogWrappedType a ->
 467   VerilogWrappedType b ->
 468   GArrowVerilog SDoc
 469
 470 instance GArrow GArrowVerilog (,) where
 471   ga_id            =  VerilogModule [] "ga_id"   (\ inp outp -> zipTree ... "assign "++outp++" = "++inp)
 472   ga_comp      f g =  VerilogModule [] "ga_comp"
 473   ga_first     :: g x y -> g (x ** z) (y ** z)
 474   ga_second    f   =  ga_comp (ga_comp ga_swap (ga_first f)) ga_swap
 475   ga_cancell   f   =  VerilogModule [] "ga_cancell" (\ [in1,in2] [outp]      -> "assign "++outp++" = "++in2)
 476   ga_cancelr   f   =  VerilogModule [] "ga_cancelr" (\ [in1,in2] [outp]      -> "assign "++outp++" = "++in1)
 477   ga_uncancell f   =  VerilogModule [] "ga_cancelr" (\ [in1]     [out1,out2] -> "assign "++out1++"=1'b0;\n assign"++out2++"="++in1)
 478   ga_uncancelr f   =  VerilogModule [] "ga_cancelr" (\ [in1]     [out1,out2] -> "assign "++out2++"=1'b0;\n assign"++out1++"="++in1)
 479   ga_assoc     f   =
 480   ga_unassoc   :: g (x**(y**z)) ((x**y)**z)
 481
 482 instance GArrowDrop GArrowVerilog (,) where
 483   ga_drop      =
 484
 485 instance GArrowCopy GArrowVerilog (,) where
 486   ga_copy      =
 487
 488 instance GArrowSwap GArrowVerilog (,) where
 489   ga_swap      =
 490
 491 instance GArrowLoop GArrowVerilog (,) where
 492   ga_loop      =
 493
 494 instance GArrowLiteral GArrowVerilog (,) where
 495   ga_literal   =
 496 -}
 497
 498
 499
 500