Add ndp docs

[ghc-hetmet.git] / docs / ndp / vect.tex
diff --git a/docs/ndp/vect.tex b/docs/ndp/vect.tex

new file mode 100644 (file)

index 0000000..06d8537
--- /dev/null
+++ b/docs/ndp/vect.tex
@@ -0,0 +1,363 @@
+\documentclass{article}
+\usepackage{haskell}
+
+\hscommand{\vect}[1]{(#1)_v}
+\hscommand{\lift}[2]{(#1)^{\uparrow#2}}
+\hscommand{\liftn}[1]{(#1)^{\uparrow{n}}}
+
+\hscommand{\patype}[1]{\mathsf{patype}\langle#1\rangle}
+\hscommand{\pa}[1]{\mathsf{pa}\langle#1\rangle}
+
+\hscommand{\capp}{\mathbin{{\$}{:}}}
+\hscommand{\cappP}{\mathbin{{\$}{:}^\uparrow}}
+
+\hscommand{\parr}[1]{[{:}{:}#1{:}{:}]}
+\hscommand{\pparr}[1]{[{:}#1{:}]}
+
+\hscommand{\Data}{\hskwd{data}}
+\hscommand{\DataF}{\hskwd{data~family}}
+\hscommand{\DataI}{\hskwd{data~instance}}
+\hscommand{\NewtypeI}{\hskwd{newtype~instance}}
+
+\setlength{\parindent}{0cm}
+
+\begin{document}
+
+\section*{Library}
+
+\subsection*{Parallel arrays}
+
+We distinguish between user-visible, parametric arrays (\<\pparr{\cdot}\>) and 
+flattened parallel arrays (\<\parr{\cdot}\>) which are internal to our
+implementation. Operations on the former have purely sequential semantics.
+During vectorisation, they are replaced by parallel arrays and operations.
+
+\begin{haskell}
+\Data \pparr{\alpha} = Array Int \alpha  \hscom{or similar} \\
+\DataF \parr{\alpha}
+\end{haskell}
+
+\subsection*{\<PA\> dictionaries}
+
+To avoid problems with using typeclasses in Core, we use explicit records for
+representing dictionaries of type-dependent operations on parallel arrays:
+
+\begin{haskell}
+\Data PA \alpha = & PA \{
+  \hsbody{lengthP & :: \parr{\alpha}\to{Int} \\
+          replicateP & :: Int\to\alpha\to\parr{\alpha} \\
+          \ldots \\ \}}
+\end{haskell}
+
+In vectorised code, polymorphic functions must be supplied with a \<PA\>
+dictionary for each type varialbe. For instance, \<\Lambda\alpha.e\> turns
+into \<\Lambda\alpha.\lambda{dPA_\alpha}::PA \alpha.e'\>.
+
+For higher-kinded type variables, we expect a function of appropriate type
+which computes the dictionary for a saturated application of the type
+variable from the dictionaries of the arguments. For instance,
+\<\Lambda{m}::{*}\to{*}.e\> turns into
+\<\Lambda{m}::{*}\to{*}.\lambda{dPA_m}::(\forall\alpha::{*}.PA \alpha\to{PA}
+(m a)).e'\>.
+In general, the type of the \<dPA\> argument for a type \<\sigma::\kappa\> is
+given by
+
+\begin{haskell}
+\patype{\sigma:{*}} & = PA \sigma \\
+\patype{\sigma:\kappa_1\to\kappa_2} & =
+\forall\alpha:\kappa_1.\patype{\alpha:\kappa_1}\to\patype{\sigma \alpha:\kappa_2}
+\end{haskell}
+
+For each user-defined or built-in type constructor \<T\> we
+automatically define its dictionary \<dPA_T::\patype{T}\>. Moreover, for every
+in-scope type variable \<\alpha\> we have its dictionary
+\<dPA_\alpha::\patype{\alpha}\>. This allows us to compute the dictionary for
+an arbitrary type as follows:
+
+\begin{haskell}
+\pa{T}           & = dPA_T \\
+\pa{\alpha}      & = dPA_{\alpha} \\
+\pa{\sigma \phi} & = \pa{\sigma}[\phi] \pa{\phi} \\
+\pa{\forall\alpha::\kappa.\sigma} & =
+\Lambda\alpha::\kappa.\lambda{dPA_{\alpha}}::\patype{\alpha::\kappa}.\pa{\sigma}
+\end{haskell}
+
+\subsection*{Closures}
+
+Closures are represented as follows:
+
+\begin{haskell}
+\Data & Clo \alpha \beta & = \exists\gamma. Clo & (PA \gamma) \gamma
+ & (\gamma\to\alpha\to\beta) (\parr{\gamma}\to\parr{\alpha}\to\parr{\beta}) \\
+\DataI & \parr{Clo \alpha \beta} & = \exists\gamma. AClo & (PA \gamma)
+  \parr{\gamma}
+  & (\gamma\to\alpha\to\beta) (\parr{\gamma}\to\parr{\alpha}\to\parr{\beta}) 
+\end{haskell}
+
+Closure application is implemented by the following two operators:
+
+\begin{haskell}
+({\capp}) & :: Clo \alpha \beta \to \alpha \to \beta \\
+({\cappP}) & :: \parr{Clo \alpha \beta} \to \parr{\alpha} \to \parr{\beta}
+\end{haskell}
+
+Note that \<({\cappP})\> is effectively the lifted version of \<({\capp})\>.
+
+\subsection*{Flat array representation}
+
+Some important instances of the \<\parr{\cdot}\> family:
+
+\begin{haskell}
+\hskwd{data} & \hskwd{instance} \parr{(\alpha_1,\ldots,\alpha_n)}
+  & = ATup_n !Int \parr{\alpha_1} \ldots \parr{\alpha_n} \\
+\hskwd{newtype}~ & \hskwd{instance} \parr{\alpha\to\beta}
+  & = AFn (\parr{\alpha}\to\parr{\beta}) \\
+\hskwd{newtype} & \hskwd{instance} \parr{PA \alpha}
+  & = APA (PA \alpha)
+\end{haskell}
+
+The last two definitions are discussed later.
+
+\section*{Vectorisation}
+
+\subsection*{Types}
+
+We assume that for each type constructor \<T\>, there exists a vectorised
+version \<T_v\> (which can be the same as \<T\>). In particular, we have
+\begin{haskell}
+({\to}_v)       & = Clo \\
+\pparr{\cdot}_v & = \parr{\cdot}
+\end{haskell}
+
+Vectorisation of types is defined as follows:
+
+\begin{haskell}
+\vect{T}      & = T_v \\
+\vect{\alpha} & = \alpha \\
+\vect{\sigma \phi} & = \vect{\sigma} \vect{\phi} \\
+\vect{\forall\alpha:\kappa.\sigma} & = \forall\alpha:\kappa.\patype{\alpha:\kappa}\to\vect{\sigma}
+\end{haskell}
+
+\subsection*{Data type declarations}
+
+\begin{haskell}
+\vect{\hskwd{data} T = \overline{C t_1 \ldots t_n}} = \hskwd{data} T_v =
+\overline{C_v \vect{t_1} \ldots \vect{t_n}}
+\end{haskell}
+
+\subsection*{Terms}
+
+We distinguish between local variables (\<x\>) and global variables and
+literals \<c\>.  We assume that we have the following typing judgement:
+
+\begin{haskell}
+\Delta,\Gamma\vdash{e}:\sigma
+\end{haskell}
+
+Here, \<\Delta\> assigns types to globals and \<\Gamma\> to locals. Moreover,
+we assume that for each global variable \<c\>, there exists a
+vectorised version \<c_v\>, i.e.,
+
+\begin{haskell}
+c:\sigma\in\Delta \Longrightarrow c_v:\vect{\sigma}\in\Delta
+\end{haskell}
+
+\subsubsection*{Vectorisation}
+
+\begin{haskell}
+\vect{c} & = c_v & c is global \\
+\vect{x} & = x   & x is local \\
+\vect{\Lambda\alpha:\kappa.e} & = 
+\Lambda\alpha:\kappa.\lambda{dPA_{\alpha}}:\patype{\alpha:\kappa}.\vect{e} \\
+\vect{e[\sigma]} & = \vect{e}[\vect{\sigma}] \\
+\vect{e_1 e_2} & = \vect{e_1}\capp\vect{e_2} \\
+\vect{\lambda{x}:\sigma.e} & = Clo \vect{\sigma} \vect{\phi} \tau \pa{\tau}
+                                   (y_1,\dots,y_n) \\
+  &
+\quad\quad(\lambda{ys}:\tau.
+           \lambda{x}:\vect{\sigma}.
+           \hskwd{case} ys \hskwd{of} (y_1,\dots,y_n) \to \vect{e}) \\
+  &
+\quad\quad(\lambda{ys}:\parr{\tau}.
+           \lambda{x}:\parr{\vect{\sigma}}.
+           \hskwd{case} ys \hskwd{of} ATup_n l y_1 \dots y_n \to \lift{e}{l})
+\\
+ \hswhere{e has type \phi \\
+          \{y_1:\tau_1,\dots,y_n:\tau_n\} & = FVS(e)\setminus{x} \\
+          \tau & = (\vect{\tau_1},\dots,\vect{\tau_n})}
+% \\
+%          e has type \phi}
+\end{haskell}
+
+Vectorisation maintains the following invariant:
+
+\begin{haskell}
+\Delta,\Gamma\vdash{e}:\sigma \Longrightarrow
+      \Delta,\Gamma_v\vdash\vect{e}:\vect{\sigma}
+\end{haskell}
+
+where \<\Gamma_v\> is defined by
+
+\begin{haskell}
+x:\sigma\in\Gamma \Longleftrightarrow x:\vect{\sigma}\in\Gamma_v
+\end{haskell}
+
+\subsubsection*{Lifting}
+\begin{haskell}
+\liftn{c:\sigma} & = replicateP \pa{\vect{\sigma}} n c_v \\
+\liftn{x}        & = x \\
+\liftn{\Lambda\alpha:\kappa.e} & =
+\Lambda\alpha:\kappa.\lambda{dPA_{\alpha}}:\patype{\alpha:\kappa}.\liftn{e} \\
+\liftn{e[\sigma]} &  = \liftn{e}[\vect{\sigma}] \pa{\vect{\sigma}} \\
+\liftn{e_1 e_2} & = \liftn{e_1} \cappP \liftn{e_2} \\
+\liftn{\lambda{x}:\sigma.e} & = AClo \vect{\sigma} \vect{\phi} \vect{\tau}
+                                   \pa{\vect{\tau}}
+                                   (ATup_n y_1 \dots y_n) \\
+  &
+\quad\quad(\lambda{ys}:\vect{\tau}.
+           \lambda{x}:\vect{\sigma}.
+           \hskwd{case} ys \hskwd{of} (y_1,\dots,y_n) \to \vect{e}) \\
+  &
+\quad\quad(\lambda{ys}:\parr{\vect{\tau}}.
+           \lambda{x}:\parr{\vect{\sigma}}.
+           \hskwd{case} ys \hskwd{of} ATup_n l y_1 \dots y_n \to \lift{e}{l})
+ \hswhere{e has type \phi \\
+          \{y_1:\tau_1,\dots,y_n:\tau_n\} & = FVS(e)\setminus{x} \\
+          \tau & = (\tau_1,\dots,\tau_n)}
+\end{haskell}
+
+Lifting maintains the following invariant:
+
+\begin{haskell}
+\Delta,\Gamma\vdash{e}:\sigma \Longrightarrow
+  \Delta,\Gamma^\uparrow\vdash\liftn{e} : \parr{\sigma_v}
+\end{haskell}
+
+where
+
+\begin{haskell}
+x:\sigma\in\Gamma \Longleftrightarrow x:\parr{\vect{\sigma}}\in\Gamma^\uparrow
+\end{haskell}
+
+Note that this is precisely the reason for the \<\parr{\cdot}\> instances for
+\<\alpha\to\beta\> and \<PA \alpha\>. A term of type \<\forall\alpha.\sigma\>
+will be lifted to a term of type
+\<\parr{\forall\alpha.PA \alpha\to\vect{\sigma}}\> which requires the
+instances. Apart from closures, these are the only occurences of \<({\to})\> in
+the transformed program, however.
+
+
+\section*{What to vectorise?}
+
+An expression is vectorisable if it only mentions globals and type constructors
+which have a vectorised form. When vectorising, we look for maximal
+vectorisable subexpressions and transform those. For instance, assuming that
+\<print\> hasn't been vectorised, in
+
+\begin{haskell}
+main = \hsdo{
+         print (sumP \pparr{\ldots}) \\
+         print (mapP \ldots \pparr{\ldots})}
+\end{haskell}
+
+we would vectorise the arguments to \<print\>.  Note that this implies that we
+never call non-vectorised code from vectorised code (is that a problem?).
+
+Whenever we come out of a vectorised ``bubble'', we have to convert between
+vectorised and unvectorised data types. The examples above would roughly
+translate to
+
+\begin{haskell}
+main = \hsdo{
+  print (unvect (sumP_v \parr{\ldots})) \\
+  print (unvect (mapP_v \ldots \parr{\ldots}))}
+\end{haskell}
+
+For this, we have to have the following functions:
+
+\begin{haskell}
+vect_\sigma & :: \sigma\to\vect{\sigma} \\
+unvect_\sigma & :: \vect{\sigma}\to\sigma
+\end{haskell}
+
+It is sufficient to have these functions only for a restricted set of types as
+we can always vectorise less if the conversions becomes too complex.
+
+Sometimes, it doesn't make sense to vectorise things. For instance, in
+
+\begin{haskell}
+foo f xs = print (f xs)
+\end{haskell}
+
+we wouldn't vectorise \<f xs\>. Unfortunately, this means that
+\<foo (mapP f) xs\> will be purely sequential.
+
+For each expression, the vectoriser gives one of the following answers.
+
+\begin{tabular}{lp{10cm}}
+\textbf{Yes} &
+the expression can be (and has been) vectorised \\
+\textbf{Maybe} &
+the expression can be vectorised but it doesn't make sense to do so
+unless it is used in a vectrorisable context (e.g., for \<f xs\> in \<foo\>)
+\\
+\textbf{No} &
+the expression can't be vectorised (although parts of it can, so we
+still get back a transformed expression)
+\end{tabular}
+
+\subsection*{Top-level definitions}
+
+For a top-level definition of the form
+
+\begin{haskell}
+f :: \sigma = e
+\end{haskell}
+
+vectorisation proceeds as follows.
+
+\begin{itemize}
+\item If \<e\> can be fully vectorised, we generate
+\begin{haskell}
+f_v :: \vect{\sigma} = \vect{e}
+\end{haskell}
+
+\item If it doesn't always make sense to vectorise \<e\>, i.e., the vectoriser
+returned \textbf{Maybe}, we leave the definition of \<f\> unchanged. Thus, we
+would not change
+\begin{haskell}
+({\$}) = \lambda{f}.\lambda{x}.f x
+\end{haskell}
+but would additionaly generate
+\begin{haskell}
+({\$}_v) = Clo \ldots
+\end{haskell}
+
+\item Otherwise (if the vectoriser said \textbf{Yes}) and we have
+\<unconv_\sigma\>, we change the definition of \<f\> to
+\begin{haskell}
+f :: \sigma = unconv_\sigma f_v
+\end{haskell}
+
+\item Otherwise (the vectoriser said \textbf{Yes} but we do not have
+\<unconv_\sigma\> or if \<e\> couldn't be fully vectorised), we change the
+definition of \<f\> to
+\begin{haskell}
+f :: \sigma = e'
+\end{haskell}
+where \<e'\> is obtaining by vectorising \<e\> as much as possible without
+changing its type. For instance, for
+\begin{haskell}
+f = \lambda{g}.\lambda{x}.mapP (\ldots) (g x)
+\end{haskell}
+we would generate
+\begin{haskell}
+f & = \lambda{g}.\lambda{x}.unvect (mapP_v (\ldots) (vect (g x))) \\
+f_v & = Clo \ldots
+\end{haskell}
+assuming we have the necessary conversions but cannot convert functions (i.e.,
+\<g\>).
+\end{itemize}
+
+\end{document}
+