docs/ndp/vect.tex

   1 \documentclass{article}
   2 \usepackage{haskell}
   3
   4 \hscommand{\vect}[1]{(#1)_v}
   5 \hscommand{\lift}[2]{(#1)^{\uparrow#2}}
   6 \hscommand{\liftn}[1]{(#1)^{\uparrow{n}}}
   7
   8 \hscommand{\patype}[1]{\mathsf{patype}\langle#1\rangle}
   9 \hscommand{\pa}[1]{\mathsf{pa}\langle#1\rangle}
  10
  11 \hscommand{\capp}{\mathbin{{\$}{:}}}
  12 \hscommand{\cappP}{\mathbin{{\$}{:}^\uparrow}}
  13
  14 \hscommand{\parr}[1]{[{:}{:}#1{:}{:}]}
  15 \hscommand{\pparr}[1]{[{:}#1{:}]}
  16
  17 \hscommand{\Data}{\hskwd{data}}
  18 \hscommand{\DataF}{\hskwd{data~family}}
  19 \hscommand{\DataI}{\hskwd{data~instance}}
  20 \hscommand{\NewtypeI}{\hskwd{newtype~instance}}
  21
  22 \setlength{\parindent}{0cm}
  23
  24 \begin{document}
  25
  26 \section*{Library}
  27
  28 \subsection*{Parallel arrays}
  29
  30 We distinguish between user-visible, parametric arrays (\<\pparr{\cdot}\>) and
  31 flattened parallel arrays (\<\parr{\cdot}\>) which are internal to our
  32 implementation. Operations on the former have purely sequential semantics.
  33 During vectorisation, they are replaced by parallel arrays and operations.
  34
  35 \begin{haskell}
  36 \Data \pparr{\alpha} = Array Int \alpha  \hscom{or similar} \\
  37 \DataF \parr{\alpha}
  38 \end{haskell}
  39
  40 \subsection*{\<PA\> dictionaries}
  41
  42 To avoid problems with using typeclasses in Core, we use explicit records for
  43 representing dictionaries of type-dependent operations on parallel arrays:
  44
  45 \begin{haskell}
  46 \Data PA \alpha = & PA \{
  47   \hsbody{lengthP & :: \parr{\alpha}\to{Int} \\
  48           replicateP & :: Int\to\alpha\to\parr{\alpha} \\
  49           \ldots \\ \}}
  50 \end{haskell}
  51
  52 In vectorised code, polymorphic functions must be supplied with a \<PA\>
  53 dictionary for each type varialbe. For instance, \<\Lambda\alpha.e\> turns
  54 into \<\Lambda\alpha.\lambda{dPA_\alpha}::PA \alpha.e'\>.
  55
  56 For higher-kinded type variables, we expect a function of appropriate type
  57 which computes the dictionary for a saturated application of the type
  58 variable from the dictionaries of the arguments. For instance,
  59 \<\Lambda{m}::{*}\to{*}.e\> turns into
  60 \<\Lambda{m}::{*}\to{*}.\lambda{dPA_m}::(\forall\alpha::{*}.PA \alpha\to{PA}
  61 (m a)).e'\>.
  62 In general, the type of the \<dPA\> argument for a type \<\sigma::\kappa\> is
  63 given by
  64
  65 \begin{haskell}
  66 \patype{\sigma:{*}} & = PA \sigma \\
  67 \patype{\sigma:\kappa_1\to\kappa_2} & =
  68 \forall\alpha:\kappa_1.\patype{\alpha:\kappa_1}\to\patype{\sigma \alpha:\kappa_2}
  69 \end{haskell}
  70
  71 For each user-defined or built-in type constructor \<T\> we
  72 automatically define its dictionary \<dPA_T::\patype{T}\>. Moreover, for every
  73 in-scope type variable \<\alpha\> we have its dictionary
  74 \<dPA_\alpha::\patype{\alpha}\>. This allows us to compute the dictionary for
  75 an arbitrary type as follows:
  76
  77 \begin{haskell}
  78 \pa{T}           & = dPA_T \\
  79 \pa{\alpha}      & = dPA_{\alpha} \\
  80 \pa{\sigma \phi} & = \pa{\sigma}[\phi] \pa{\phi} \\
  81 \pa{\forall\alpha::\kappa.\sigma} & =
  82 \Lambda\alpha::\kappa.\lambda{dPA_{\alpha}}::\patype{\alpha::\kappa}.\pa{\sigma}
  83 \end{haskell}
  84
  85 \subsection*{Closures}
  86
  87 Closures are represented as follows:
  88
  89 \begin{haskell}
  90 \Data & Clo \alpha \beta & = \exists\gamma. Clo & (PA \gamma) \gamma
  91  & (\gamma\to\alpha\to\beta) (\parr{\gamma}\to\parr{\alpha}\to\parr{\beta}) \\
  92 \DataI & \parr{Clo \alpha \beta} & = \exists\gamma. AClo & (PA \gamma)
  93   \parr{\gamma}
  94   & (\gamma\to\alpha\to\beta) (\parr{\gamma}\to\parr{\alpha}\to\parr{\beta})
  95 \end{haskell}
  96
  97 Closure application is implemented by the following two operators:
  98
  99 \begin{haskell}
 100 ({\capp}) & :: Clo \alpha \beta \to \alpha \to \beta \\
 101 ({\cappP}) & :: \parr{Clo \alpha \beta} \to \parr{\alpha} \to \parr{\beta}
 102 \end{haskell}
 103
 104 Note that \<({\cappP})\> is effectively the lifted version of \<({\capp})\>.
 105
 106 \subsection*{Flat array representation}
 107
 108 Some important instances of the \<\parr{\cdot}\> family:
 109
 110 \begin{haskell}
 111 \hskwd{data} & \hskwd{instance} \parr{(\alpha_1,\ldots,\alpha_n)}
 112   & = ATup_n !Int \parr{\alpha_1} \ldots \parr{\alpha_n} \\
 113 \hskwd{newtype}~ & \hskwd{instance} \parr{\alpha\to\beta}
 114   & = AFn (\parr{\alpha}\to\parr{\beta}) \\
 115 \hskwd{newtype} & \hskwd{instance} \parr{PA \alpha}
 116   & = APA (PA \alpha)
 117 \end{haskell}
 118
 119 The last two definitions are discussed later.
 120
 121 \section*{Vectorisation}
 122
 123 \subsection*{Types}
 124
 125 We assume that for each type constructor \<T\>, there exists a vectorised
 126 version \<T_v\> (which can be the same as \<T\>). In particular, we have
 127 \begin{haskell}
 128 ({\to}_v)       & = Clo \\
 129 \pparr{\cdot}_v & = \parr{\cdot}
 130 \end{haskell}
 131
 132 Vectorisation of types is defined as follows:
 133
 134 \begin{haskell}
 135 \vect{T}      & = T_v \\
 136 \vect{\alpha} & = \alpha \\
 137 \vect{\sigma \phi} & = \vect{\sigma} \vect{\phi} \\
 138 \vect{\forall\alpha:\kappa.\sigma} & = \forall\alpha:\kappa.\patype{\alpha:\kappa}\to\vect{\sigma}
 139 \end{haskell}
 140
 141 \subsection*{Data type declarations}
 142
 143 \begin{haskell}
 144 \vect{\hskwd{data} T = \overline{C t_1 \ldots t_n}} = \hskwd{data} T_v =
 145 \overline{C_v \vect{t_1} \ldots \vect{t_n}}
 146 \end{haskell}
 147
 148 \subsection*{Terms}
 149
 150 We distinguish between local variables (\<x\>) and global variables and
 151 literals \<c\>.  We assume that we have the following typing judgement:
 152
 153 \begin{haskell}
 154 \Delta,\Gamma\vdash{e}:\sigma
 155 \end{haskell}
 156
 157 Here, \<\Delta\> assigns types to globals and \<\Gamma\> to locals. Moreover,
 158 we assume that for each global variable \<c\>, there exists a
 159 vectorised version \<c_v\>, i.e.,
 160
 161 \begin{haskell}
 162 c:\sigma\in\Delta \Longrightarrow c_v:\vect{\sigma}\in\Delta
 163 \end{haskell}
 164
 165 \subsubsection*{Vectorisation}
 166
 167 \begin{haskell}
 168 \vect{c} & = c_v & c is global \\
 169 \vect{x} & = x   & x is local \\
 170 \vect{\Lambda\alpha:\kappa.e} & =
 171 \Lambda\alpha:\kappa.\lambda{dPA_{\alpha}}:\patype{\alpha:\kappa}.\vect{e} \\
 172 \vect{e[\sigma]} & = \vect{e}[\vect{\sigma}] \\
 173 \vect{e_1 e_2} & = \vect{e_1}\capp\vect{e_2} \\
 174 \vect{\lambda{x}:\sigma.e} & = Clo \vect{\sigma} \vect{\phi} \tau \pa{\tau}
 175                                    (y_1,\dots,y_n) \\
 176   &
 177 \quad\quad(\lambda{ys}:\tau.
 178            \lambda{x}:\vect{\sigma}.
 179            \hskwd{case} ys \hskwd{of} (y_1,\dots,y_n) \to \vect{e}) \\
 180   &
 181 \quad\quad(\lambda{ys}:\parr{\tau}.
 182            \lambda{x}:\parr{\vect{\sigma}}.
 183            \hskwd{case} ys \hskwd{of} ATup_n l y_1 \dots y_n \to \lift{e}{l})
 184 \\
 185  \hswhere{e has type \phi \\
 186           \{y_1:\tau_1,\dots,y_n:\tau_n\} & = FVS(e)\setminus{x} \\
 187           \tau & = (\vect{\tau_1},\dots,\vect{\tau_n})}
 188 % \\
 189 %          e has type \phi}
 190 \end{haskell}
 191
 192 Vectorisation maintains the following invariant:
 193
 194 \begin{haskell}
 195 \Delta,\Gamma\vdash{e}:\sigma \Longrightarrow
 196       \Delta,\Gamma_v\vdash\vect{e}:\vect{\sigma}
 197 \end{haskell}
 198
 199 where \<\Gamma_v\> is defined by
 200
 201 \begin{haskell}
 202 x:\sigma\in\Gamma \Longleftrightarrow x:\vect{\sigma}\in\Gamma_v
 203 \end{haskell}
 204
 205 \subsubsection*{Lifting}
 206 \begin{haskell}
 207 \liftn{c:\sigma} & = replicateP \pa{\vect{\sigma}} n c_v \\
 208 \liftn{x}        & = x \\
 209 \liftn{\Lambda\alpha:\kappa.e} & =
 210 \Lambda\alpha:\kappa.\lambda{dPA_{\alpha}}:\patype{\alpha:\kappa}.\liftn{e} \\
 211 \liftn{e[\sigma]} &  = \liftn{e}[\vect{\sigma}] \pa{\vect{\sigma}} \\
 212 \liftn{e_1 e_2} & = \liftn{e_1} \cappP \liftn{e_2} \\
 213 \liftn{\lambda{x}:\sigma.e} & = AClo \vect{\sigma} \vect{\phi} \vect{\tau}
 214                                    \pa{\vect{\tau}}
 215                                    (ATup_n y_1 \dots y_n) \\
 216   &
 217 \quad\quad(\lambda{ys}:\vect{\tau}.
 218            \lambda{x}:\vect{\sigma}.
 219            \hskwd{case} ys \hskwd{of} (y_1,\dots,y_n) \to \vect{e}) \\
 220   &
 221 \quad\quad(\lambda{ys}:\parr{\vect{\tau}}.
 222            \lambda{x}:\parr{\vect{\sigma}}.
 223            \hskwd{case} ys \hskwd{of} ATup_n l y_1 \dots y_n \to \lift{e}{l})
 224  \hswhere{e has type \phi \\
 225           \{y_1:\tau_1,\dots,y_n:\tau_n\} & = FVS(e)\setminus{x} \\
 226           \tau & = (\tau_1,\dots,\tau_n)}
 227 \end{haskell}
 228
 229 Lifting maintains the following invariant:
 230
 231 \begin{haskell}
 232 \Delta,\Gamma\vdash{e}:\sigma \Longrightarrow
 233   \Delta,\Gamma^\uparrow\vdash\liftn{e} : \parr{\sigma_v}
 234 \end{haskell}
 235
 236 where
 237
 238 \begin{haskell}
 239 x:\sigma\in\Gamma \Longleftrightarrow x:\parr{\vect{\sigma}}\in\Gamma^\uparrow
 240 \end{haskell}
 241
 242 Note that this is precisely the reason for the \<\parr{\cdot}\> instances for
 243 \<\alpha\to\beta\> and \<PA \alpha\>. A term of type \<\forall\alpha.\sigma\>
 244 will be lifted to a term of type
 245 \<\parr{\forall\alpha.PA \alpha\to\vect{\sigma}}\> which requires the
 246 instances. Apart from closures, these are the only occurences of \<({\to})\> in
 247 the transformed program, however.
 248
 249
 250 \section*{What to vectorise?}
 251
 252 An expression is vectorisable if it only mentions globals and type constructors
 253 which have a vectorised form. When vectorising, we look for maximal
 254 vectorisable subexpressions and transform those. For instance, assuming that
 255 \<print\> hasn't been vectorised, in
 256
 257 \begin{haskell}
 258 main = \hsdo{
 259          print (sumP \pparr{\ldots}) \\
 260          print (mapP \ldots \pparr{\ldots})}
 261 \end{haskell}
 262
 263 we would vectorise the arguments to \<print\>.  Note that this implies that we
 264 never call non-vectorised code from vectorised code (is that a problem?).
 265
 266 Whenever we come out of a vectorised ``bubble'', we have to convert between
 267 vectorised and unvectorised data types. The examples above would roughly
 268 translate to
 269
 270 \begin{haskell}
 271 main = \hsdo{
 272   print (unvect (sumP_v \parr{\ldots})) \\
 273   print (unvect (mapP_v \ldots \parr{\ldots}))}
 274 \end{haskell}
 275
 276 For this, we have to have the following functions:
 277
 278 \begin{haskell}
 279 vect_\sigma & :: \sigma\to\vect{\sigma} \\
 280 unvect_\sigma & :: \vect{\sigma}\to\sigma
 281 \end{haskell}
 282
 283 It is sufficient to have these functions only for a restricted set of types as
 284 we can always vectorise less if the conversions becomes too complex.
 285
 286 Sometimes, it doesn't make sense to vectorise things. For instance, in
 287
 288 \begin{haskell}
 289 foo f xs = print (f xs)
 290 \end{haskell}
 291
 292 we wouldn't vectorise \<f xs\>. Unfortunately, this means that
 293 \<foo (mapP f) xs\> will be purely sequential.
 294
 295 For each expression, the vectoriser gives one of the following answers.
 296
 297 \begin{tabular}{lp{10cm}}
 298 \textbf{Yes} &
 299 the expression can be (and has been) vectorised \\
 300 \textbf{Maybe} &
 301 the expression can be vectorised but it doesn't make sense to do so
 302 unless it is used in a vectrorisable context (e.g., for \<f xs\> in \<foo\>)
 303 \\
 304 \textbf{No} &
 305 the expression can't be vectorised (although parts of it can, so we
 306 still get back a transformed expression)
 307 \end{tabular}
 308
 309 \subsection*{Top-level definitions}
 310
 311 For a top-level definition of the form
 312
 313 \begin{haskell}
 314 f :: \sigma = e
 315 \end{haskell}
 316
 317 vectorisation proceeds as follows.
 318
 319 \begin{itemize}
 320 \item If \<e\> can be fully vectorised, we generate
 321 \begin{haskell}
 322 f_v :: \vect{\sigma} = \vect{e}
 323 \end{haskell}
 324
 325 \item If it doesn't always make sense to vectorise \<e\>, i.e., the vectoriser
 326 returned \textbf{Maybe}, we leave the definition of \<f\> unchanged. Thus, we
 327 would not change
 328 \begin{haskell}
 329 ({\$}) = \lambda{f}.\lambda{x}.f x
 330 \end{haskell}
 331 but would additionaly generate
 332 \begin{haskell}
 333 ({\$}_v) = Clo \ldots
 334 \end{haskell}
 335
 336 \item Otherwise (if the vectoriser said \textbf{Yes}) and we have
 337 \<unconv_\sigma\>, we change the definition of \<f\> to
 338 \begin{haskell}
 339 f :: \sigma = unconv_\sigma f_v
 340 \end{haskell}
 341
 342 \item Otherwise (the vectoriser said \textbf{Yes} but we do not have
 343 \<unconv_\sigma\> or if \<e\> couldn't be fully vectorised), we change the
 344 definition of \<f\> to
 345 \begin{haskell}
 346 f :: \sigma = e'
 347 \end{haskell}
 348 where \<e'\> is obtaining by vectorising \<e\> as much as possible without
 349 changing its type. For instance, for
 350 \begin{haskell}
 351 f = \lambda{g}.\lambda{x}.mapP (\ldots) (g x)
 352 \end{haskell}
 353 we would generate
 354 \begin{haskell}
 355 f & = \lambda{g}.\lambda{x}.unvect (mapP_v (\ldots) (vect (g x))) \\
 356 f_v & = Clo \ldots
 357 \end{haskell}
 358 assuming we have the necessary conversions but cannot convert functions (i.e.,
 359 \<g\>).
 360 \end{itemize}
 361
 362 \end{document}
 363