examples/GArrowTutorial.hs

   1 {-# OPTIONS_GHC -XModalTypes -XScopedTypeVariables -XFlexibleContexts -XMultiParamTypeClasses -ddump-types -XNoMonoPatBinds #-}
   2 module GArrowTutorial
   3 where
   4 import Data.Bits
   5 import Data.Bool (not)
   6 import GHC.HetMet.CodeTypes hiding ((-))
   7 import GHC.HetMet.GArrow
   8 import Control.Category
   9 import Control.Arrow
  10 import Prelude hiding ( id, (.) )
  11
  12 -- The best way to understand heterogeneous metaprogramming and
  13 -- generalized arrows is to play around with this file, poking at the
  14 -- examples until they fail to typecheck -- you'll learn a lot that
  15 -- way!
  16
  17 -- Once you've built the modified compiler, you can compile this file
  18 -- with:
  19 --
  20 --    $ inplace/bin/ghc-stage2 tutorial.hs
  21 --
  22
  23 -- -XModalTypes adds a new syntactical expression, "code brackets":
  24 code_fst = <[ \(x,y) -> x ]>
  25
  26 -- This new expression is the introduction form for modal types:
  27 code_fst :: forall a b g. <[ (a,b) -> a ]>@g
  28
  29 -- Think of <[T]>@g as being the type of programs written in language
  30 -- "g" which, when "executed", return a value of type "T".  I mention
  31 -- "language g" because the *heterogeneous* aspect of HetMet means
  32 -- that we can limit the sorts of constructs allowed inside the code
  33 -- brackets, permitting only a subset of Haskell (you have to use
  34 -- Haskell syntax, though).
  35
  36 -- There is a second new expression form, "~~", called "escape":
  37
  38 code_fst_fst = <[ \z -> ~~code_fst (~~code_fst z) ]>
  39
  40 -- Note that ~~ binds more tightly than any other operator.  There is
  41 -- an alternate version, "~~$", which binds more weakly than any other
  42 -- operator (this is really handy sometimes!).  To demonstrate this,
  43 -- the next two expressions differ only in superficial syntax:
  44
  45 example1    foo bar = <[ ~~$ foo bar  ]>
  46 example2    foo bar = <[ ~~( foo bar) ]>
  47 -- example3 foo bar = <[ ~~  foo bar  ]>
  48
  49 -- ... but the third one is completely different (and in fact, doesn't
  50 -- even parse, but we'll get to that in a moment)
  51
  52 -- The escape operation must appear within code brackets.  In truth,
  53 -- it is really a "hole" punched in the code brackets -- the thing to
  54 -- which the escape operator gets applied is typed as if it were
  55 -- *outside* the code brackets.  It must have type <[T]>, and the
  56 -- escape operator allows it to be used *inside* code brackets as if
  57 -- it had type "T".
  58
  59 -- So, the escape operator is basically a way of pasting code
  60 -- fragments into each other.
  61
  62 -- This is where those type variables after the "@" sign come in: if
  63 -- you paste two pieces of code into a third, all three must be
  64 -- written in the same language.  We express this by unifying their
  65 -- tyvars:
  66
  67 compose_code :: forall g a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@g
  68 compose_code x y = <[ \z -> ~~y (~~x z) ]>
  69
  70 -- Now, try commenting out the type ascription above and uncommenting
  71 -- any of these three:
  72 --
  73 -- compose_code :: forall g h a b c. <[a->b]>@h -> <[b->c]>@g -> <[a->c]>@g
  74 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@h -> <[a->c]>@g
  75 -- compose_code :: forall g h a b c. <[a->b]>@g -> <[b->c]>@g -> <[a->c]>@h
  76 --
  77
  78 -- The typechecker won't let you get away with that -- you're trying
  79 -- to force a type which is "too polymorphic" onto paste2.  If the
  80 -- compiler allowed that, the resulting metaprogram might try to
  81 -- splice together programs written in different languages, resulting
  82 -- in mayhem.
  83
  84 -- NEW SCOPING RULES: The syntactical depth (or just "depth") of an
  85 -- expression is the number of surrounding code-brackets minus the
  86 -- number of surrounding escapes (this is strictly a syntax concept
  87 -- and has NOTHING to do with the type system!).  It is very important
  88 -- to keep in mind that the scope of a bound variable extends only to
  89 -- expressions at the same depth!  To demonstrate, the following
  90 -- expression will fail to parse:
  91
  92 -- badness = \x -> <[ x ]>
  93
  94 -- ...and in the following expression, the occurrence of "x" is bound
  95 -- by the first (outer) lambda, not the second one:
  96
  97 no_shadowing_here = \x -> <[ \x -> ~~x ]>
  98
  99 -- Lastly, you can wrap code-brackets around an identifier in a
 100 -- top-level, let, or where binding.  Notice how GHC doesn't complain
 101 -- here about defining an identifier twice!
 102
 103 foo       =    \x         -> x+1
 104 <[ foo ]> = <[ \(x::Bool) -> x   ]>
 105
 106 -- Now you can use foo (the second one!) inside code-brackets:
 107
 108 bar x = <[ foo ~~x ]>
 109
 110 bar :: forall g. <[Bool]>@g -> <[Bool]>@g
 111
 112 -- In fact, the identifiers have completely unrelated types.  Which
 113 -- brings up another important point: types are ALWAYS assigned
 114 -- "relative to" depth zero.  So although we imagine "foo" existing at
 115 -- depth-one, its type is quite firmly established as <[ Bool -> Bool ]>
 116
 117 -- It has to be this way -- to see why, consider a term which is more
 118 -- polymorphic than "foo":
 119
 120 <[ foo' ]> = <[ \x -> x ]>
 121
 122 -- Its type is:
 123
 124 <[ foo' ]> :: forall a g . <[ a -> a ]>@g
 125
 126 -- ...and there's no way to express the g-polymorphism entirely from
 127 -- within the brackets.
 128
 129 -- So why does all of this matter?  Mainly so that we can continue to use .  We'd like
 130 -- the "+" operator to work "as expected" -- in other words, we'd like
 131 -- people to be able to write things like
 132
 133 increment_at_level1 = <[ \x -> x + 1 ]>
 134
 135 -- However, in unmodified haskell an identifier like (+) may have only
 136 -- one type.  In this case that type is:
 137 --
 138 --     (+) :: Num a => a -> a -> a
 139 --
 140 -- Now, we could simply decree that when (+) appears inside code
 141 -- brackets, an "implicit ~~" is inserted, so the desugared expression
 142 -- is:
 143 --
 144 --    increment_at_level1 = <[ \x -> ~~(+) x 1 ]>
 145 --
 146 -- unfortunately this isn't going to work for guest languages that
 147 -- don't have higher-order functions.  Haskell uses curried arguments
 148 -- because it has higher-order functions, but in a first-order guest
 149 -- language a more sensible type for (+) would be:
 150 --
 151 --    (+) :: Num a => (a,a) -> a
 152 --
 153 -- ... or even something less polymorphic, like
 154 --
 155 --    (+) :: (Int,Int) -> Int
 156 --
 157 -- so to maintain flexibility, we allow an identifier to have
 158 -- different types at different syntactic depths; this way type
 159 -- choices made for Haskell don't get imposed on guest languages that
 160 -- are missing some of its features.
 161 --
 162 -- In hindsight, what we REALLY want is for increment_at_level1 to
 163 -- be desugared like this (much like the Arrow (|...|) syntax):
 164 --
 165 --   increment_at_level1 = <[ \x -> ~~( <[x]> + <[1]> ) ]>
 166 --
 167 -- ... because then we can declare
 168 --
 169 --   instance Num a => Num <[a]> where ...
 170 --
 171 -- or just
 172 --
 173 --   instance Num <[Int]> where ...
 174 --
 175 -- unfortunately there's a major problem: knowing how to do this sort
 176 -- of desugaring requires knowing the *arity* of a function.  For
 177 -- symbols we can kludge it by checking Haskell's parsing rules (there
 178 -- are only a handful of unary symbols; all others are binary), but
 179 -- this is crude and won't work at all for non-symbol identifiers.
 180 -- And we can look at a type like x->y->z and say "oh, that's a
 181 -- two-argument function", but sometimes GHC doesn't know the complete
 182 -- type of an identifier in the midst of unification (i.e. "x has type
 183 -- Int->a for some a, where a could be Int or Int->Int"), so guessing
 184 -- the arity from the type cannot be done during parsing, which is
 185 -- when we need to do this.
 186 --
 187 -- Okay, I think that's more or less a brain dump of why I changed the
 188 -- scoping rules and the problems with the other solutions I tried.
 189 --
 190 -- I am very interested in hearing any suggestions on better ways of
 191 -- dealing with this, so long as you can still use operators like (+)
 192 -- in guest languages without higher-order functions.
 193 --
 194
 195 --------------------------------------------------------------------------------
 196 -- Ye Olde and Most Venerable "pow" Function
 197
 198 pow :: forall c. GuestIntegerLiteral c => GuestLanguageMult c Integer => Integer -> <[ Integer -> Integer ]>@c
 199 pow n =
 200    if n==0
 201    then <[ \x -> 1 ]>
 202    else <[ \x -> x * ~~(pow (n - 1)) x ]>
 203
 204
 205 -- a more efficient two-level pow
 206 pow' :: forall c. GuestIntegerLiteral c => GuestLanguageMult c Integer => Integer -> <[ Integer -> Integer ]>@c
 207 pow' 0  = <[ \x -> 1 ]>
 208 pow' 1  = <[ \x -> x ]>
 209 pow' n  = if   n `mod` 2==0
 210           then <[ \x -> (\y -> y*y) (~~(pow' $ n `shiftR` 2) x) ]>
 211           else <[ \x -> x * ~~(pow' $ n-1) x ]>
 212
 213
 214