27-aug update

[fleet.git] / am33.tex
diff --git a/am33.tex b/am33.tex

index 873f3dd..0486704 100644 (file)
--- a/am33.tex
+++ b/am33.tex
@@ -20,6 +20,12 @@
  
  \definecolor{light}{gray}{0.7}
  
+\setlength{\marginparwidth}{1.2in}
+\let\oldmarginpar\marginpar
+\renewcommand\marginpar[1]{\-\oldmarginpar[\raggedleft\footnotesize #1]%
+{\raggedright\footnotesize #1}}
+
+
  \newcommand{\footnoteremember}[2]{
    \footnote{#2}
    \newcounter{#1}
@@ -31,7 +37,7 @@
  %\pdfpagewidth 8.5in
  %\pdfpageheight 11in 
  %\topmargin 0in
-%\textheight 7.5in
+\textheight 7.9in
  %\textwidth 6.0in
  %\oddsidemargin 0.25in
  %\evensidemargin 0.25in
@@ -46,7 +52,11 @@
  \end{tabular}}
  }
  
-\title{\vspace{-1cm}The FleetTwo Dock}
+\title{\vspace{-1cm}AM33: The FleetTwo Dock
+\\
+{\normalsize
+Adam Megacz
+}}
  
  \begin{document}
  
@@ -56,141 +66,175 @@
  Changes:
  
  \begin{tabular}{rl}
-\color{red}
-07-Apr
-& removed ``+'' from ``potentially torpedoable'' row where it does not occur in Execute  \\
-06-Apr
-& extended {\tt LiteralPath} to 13 bits (impl need not use all of them)  \\
-& update table 3.1.2  \\
-& rename {\tt S} flag to {\tt C}  \\
-\color{black}
-& noted that {\tt setFlags} can be used as {\tt nop} \\
-29-Mar
-& removed the {\tt L} flag (epilogues can now do this) \\
-& removed {\tt take\{Inner|Outer\}LoopCounter} instructions \\
-& renamed {\tt data} instruction to {\tt literal} \\
-& renamed {\tt send} instruction to {\tt move} \\
-23-Mar
-& added ``if its predicate is true'' to repeat count \\
-& added note that red wires do not contact ships \\
-& changed name of {\tt flags} instruction to {\tt setFlags} \\
-& removed black dot from diagrams \\
-& changed {\tt OL} (Outer Loop participant) to {\tt OS} (One Shot) and inverted polarity \\
-& indicated that the death of the {\tt tail} instruction is what causes the hatch to be unsealed \\
-& indicated that only {\tt send} instructions which wait for data are torpedoable \\
-& added section ``Torpedo Details'' \\
-& removed {\tt torpedo} instruction \\
-12-Mar
-\color{black}
-& renamed loop+repeat to outer+inner (not in red) \\
-& renamed {\tt Z} flag to {\tt L} flag (not in red) \\
-& rewrote ``inner and outer loops'' section \\
-& updated all diagrams \\
+27-Aug
+& \color{red} Note that decision to requeue is based on  value of OLC {\it before} execution\\
+10-Jul
+& Added {\tt OLC=0} predicate \\
+& Eliminated {\tt TAPL} (made possible by previous change) \\
+& Expanded {\tt set} {\tt Immediate} field from 13 bits to 14 bits (made possible by previous change)\\
+09-Jul
+& Fixed a few typos \\
+& Added {\tt DataLatch}\to{\tt TAPL} (Amir's request) \\
+& Eliminate ability to predicate directly on {\tt C}-flag (Ivan's request) \\
+16-Jun
+& When a torpedo strikes, {\tt ILC} is set to {\tt 1} \\
+& Only {\tt move} can be torpedoed (removed {\tt I}-bit from {\tt set}/{\tt shift}) \\
+11-Jun
+& Changed all uses of ``Payload'' to ``Immediate'' \color{black} (not in red) \\
+& Reworked encoding of {\tt set} instruction \\
+\color{black} 
+06-Jun
+& Factored in Russell Kao's comments (thanks!)\\
+& Added mechanism for setting C-flag from fabric even on outboxes\\
+05-Jun
+& Made {\tt OLC} test a predicate-controlled condition\\
+& Rewrote ``on deck'' section \\
+& Added ``{\tt unset}'' value for {\tt ILC}\\
+& Changed {\tt DP} to {\tt DataPredecessor} for clarity\\
  \color{black}
-7-Mar
-& Moved address bits to the LSB-side of a 37-bit instruction \\
-& Added {\it micro-instruction} and {\it composite instruction} terms \\
-& Removed the {\tt DL} field, added {\tt decrement} mode to {\tt loop} \\
-& Created the {\tt Hold} field \\
-& Changed how ReLooping works \\
-& Removed {\tt clog}, {\tt unclog}, {\tt interrupt}, and {\tt massacre} \\
+30-Apr
+& added comment about address-to-path ship \\
+& changed {\tt DST} field of {\tt set} instruction from 2 bits to 3 \\
+& changed the order of instructions in the encoding map \\
+23-Apr
+& added epilogue fifo to diagrams \\
+& indicated that a token sent to the instruction port is treated as a torpedo \\
+%18-Apr
+%& replaced {\tt setInner}, {\tt setOuter}, {\tt setFlags} with unified {\tt set} instruction \\
+%& replaced {\tt literal} with {\tt shift} instruction \\
+%17-Apr
+%& Made all instructions except {\tt setOuter} depend on {\tt OLC>0}  \\
+%& Removed ability to manually set the {\tt C} flag  \\
+%& Expanded predicate field to three bits \\
+%& New literals scheme (via shifting) \\
+%& Instruction encoding changes made at Ivan's request (for layout purposes) \\
+%& Added summary of instruction encodings on last page \\
+%07-Apr
+%& removed ``+'' from ``potentially torpedoable'' row where it does not occur in Execute  \\
+%06-Apr
+%& extended {\tt LiteralPath} to 13 bits (impl need not use all of them)  \\
+%& update table 3.1.2  \\
+%& rename {\tt S} flag to {\tt C}  \\
+%& noted that {\tt setFlags} can be used as {\tt nop} \\
+%29-Mar
+%& removed the {\tt L} flag (epilogues can now do this) \\
+%& removed {\tt take\{Inner|Outer\}LoopCounter} instructions \\
+%& renamed {\tt data} instruction to {\tt literal} \\
+%& renamed {\tt send} instruction to {\tt move} \\
+%23-Mar
+%& added ``if its predicate is true'' to repeat count \\
+%& added note that red wires do not contact ships \\
+%& changed name of {\tt flags} instruction to {\tt setFlags} \\
+%& removed black dot from diagrams \\
+%& changed {\tt OL} (Outer Loop participant) to {\tt OS} (One Shot) and inverted polarity \\
+%& indicated that the death of the {\tt tail} instruction is what causes the hatch to be unsealed \\
+%& indicated that only {\tt send} instructions which wait for data are torpedoable \\
+%& added section ``Torpedo Details'' \\
+%& removed {\tt torpedo} instruction \\
+%12-Mar
+%\color{black}
+%& renamed loop+repeat to outer+inner (not in red) \\
+%& renamed {\tt Z} flag to {\tt L} flag (not in red) \\
+%& rewrote ``inner and outer loops'' section \\
+%& updated all diagrams \\
+%\color{black}
+%7-Mar
+%& Moved address bits to the LSB-side of a 37-bit instruction \\
+%& Added {\it micro-instruction} and {\it composite instruction} terms \\
+%& Removed the {\tt DL} field, added {\tt decrement} mode to {\tt loop} \\
+%& Created the {\tt Hold} field \\
+%& Changed how ReLooping works \\
+%& Removed {\tt clog}, {\tt unclog}, {\tt interrupt}, and {\tt massacre} \\
  \end{tabular}
  \end{abstract}
  
  \vfill
  
  \begin{center}
-\epsfig{file=overview,width=1.5in}
-\epsfig{file=indock,width=3in}
+\epsfig{file=all,height=1.5in}
+\epsfig{file=overview-new,height=1.5in}
  \end{center}
  
  \pagebreak
  
  \section{Overview of Fleet}
  
-A Fleet processor consists of a {\it switch fabric} with several
-functional units called {\it ships} connected to it.  At each
-connection between a ship and the switch fabric lies a programmable
-element known as the {\it dock}.
+A Fleet processor is organized around a {\it switch fabric}, which is
+a packet-switched network with reliable in-order delivery.  The switch
+fabric is used to carry data between different functional units,
+called {\it ships}.  Each ship is connected to the switch fabric by
+one or more programmable elements known as {\it docks}.
  
  A {\it path} specifies a route through the switch fabric from a
  particular {\it source} to a particular {\it destination}.  The
-combination of a path and a single word {\it payload} is called a {\it packet}.  The
-switch fabric carries packets from their sources to their
-destinations.  Each dock has two destinations: one for {\it
+combination of a path and a single word to be delivered is called a
+{\it packet}.  The switch fabric carries packets from their sources to
+their destinations.  Each dock has two destinations: one for {\it
    instructions} and one for {\it data}.  A Fleet is programmed by
-depositing packets into the switch fabric; these packets' paths lead
-them to the instruction destinations of the docks.
+depositing instruction packets into the switch fabric with paths that
+will lead them to instruction destinations of the docks at which they
+are to execute.
  
  When a packet arrives at the instruction destination of a dock, it is
  enqueued for execution.  Before the instruction executes, it may cause
  the dock to wait for a packet to arrive at the dock's data destination
-or for a value to be presented by the ship.  It may present a data
-value to the ship or transmit it for transmission to some other
-destination.
+or for a value to be presented by the ship.  When an instruction
+executes it may consume this data and may present a data value to the
+ship or transmit a packet.
  
  When an instruction sends a packet into the switch fabric, it may
  specify that the payload of the packet is irrelevant.  Such packets
  are known as {\it tokens}, and consume less energy than data packets.
-From a programmer's perspective, a token packet is indistinguishable
-from a data packet with a unknown payload.
  
-In the diagram below, the red wires carry instructions and the blue
-wires carry data; the switch fabric (gray area) carries both.  Notice
-that the red (instruction) wires do not contact the ships.  This is an
-advantage: ships are designed without any consideration for the
-instructions used to program their docks.
  
  \begin{center}
-\epsfig{file=overview,width=2.5in}\\
-{\it Overview of a Fleet processor; gray shading represents a
-  packet-switched network fabric; blue lines carry data, red lines
-  carry instructions.}
+\epsfig{file=overview-new,width=2.5in}\\
+{\it Overview of a Fleet processor; dark gray shading represents the
+  switch fabric, ships are shown in light gray, and docks are shown in blue.}
  \end{center}
  \color{black}
  
  \pagebreak
  
-\section{The FleetTwo Pump}
+\section{The FleetTwo Dock}
  
-The diagram below represents a {\it programmer's} conceptual view of
-the interface between ships and the switch fabric.  Actual
-implementation circuitry may differ substantially.  Sources and
-destinations that can send and receive only tokens -- not data items
--- are drawn as dashed lines.
+The diagram below represents a conceptual view of the interface
+between ships and the switch fabric; actual implementation circuitry
+may differ.
  
  \begin{center}
-\epsfig{file=indock,width=3.5in}\\
-{\it an ``input'' dock}
-
-\epsfig{file=outdock,width=3.5in}\\
-{\it an ``output'' dock}
+\epsfig{file=all,width=3.5in}\\
+{\it An ``input'' dock and ``output'' dock connected to a ship.  Solid
+  blue lines carry either tokens or data words, red lines carry either
+  instructions or torpedoes, and dashed lines carry only tokens.}
  \end{center}
  
-The term {\it port} refers to an interface to the ship, the {\it
-  dock} connecting it to the switch fabric, and the corresponding
-sources and destinations on the switch fabric.
-
  Each dock consists of a {\it data latch}, which is as wide as a single
  machine word and a {\it pump}, which is a circular fifo of
  instruction-width latches.  The values in the pump control the data
-latch.
+latch.  The dock also includes a {\it path latch}, which
+stores the path along which outgoing packets will be sent.\color{black}
  
  Note that the pump in each dock has a destination of its own; this is
  the {\it instruction destination} mentioned in the previous section.
-Note that unlike all other destinations, there is no buffering fifo
-guarding this one.  The size of these fifos are exposed to the
-software programmer so he can avoid deadlock.
+
+From any source to any dock's data destination there are
+two distinct paths which differ by a single bit.  This bit is known as
+the ``signal'' bit, and the routing of a packet is not affected by it;
+the signal bit is used to pass control values between docks.  Note that paths
+terminating at an {\it instruction} destination need not have a signal
+bit.  \color{black}
  
  \pagebreak
  \section{Instructions}
  
  In order to cause an instruction to execute, the programmer must first
-cause that instruction word to arrive in the data latch of some output
-dock.  For example, this might be the ``data read'' output dock of the
-memory access ship or the output of a fifo ship.  Once an instruction
-has arrived at this output dock, it is {\it dispatched} by sending it
-to the {\it instruction port} of the dock at which it is to execute.
+arrange for that instruction word to arrive in the data latch of some
+output dock.  For example, this might be the ``data read'' output dock
+of the memory access ship or the output of a fifo ship.  Once an
+instruction has arrived at this output dock, it is {\it dispatched} by
+sending it to the {\it instruction port} of the dock at which it is to
+execute.
  
  Each instruction is 26 bits long, which makes it possible for an
  instruction and an 11-bit path to fit in a single word of memory.
@@ -205,191 +249,115 @@ This path is the path from the {\it dispatching} dock to the {\it
    \bitbox{11}{dispatch path} 
  \end{bytefield}}
  
-{\bf Note:} the instruction encodings below are simply ``something to
-shoot at'' and a sanity check to make sure we haven't overrun our bit
-budget.  The final instruction encodings will probably be
-different.
-
-All instruction words have the following format:
-
-\setlength{\bitwidth}{3.5mm}
-{\tt \footnotesize
-\begin{bytefield}{37}
-  \bitheader[b]{0,10,11,36}\\
-\color{black}
-  \bitbox{1}{I} 
-  \bitbox{1}{OS}
-  \bitbox{2}{P} 
-\color{light}
-  \bitbox[tbr]{22}{} 
-  \bitbox{11}{dispatch path} 
-\color{black}
-\end{bytefield}}
-
-Each instruction word is called a {\it micro instruction}.
-Collections of one or more micro instruction are known as {\it
-  composite instructions}.
  
-The {\tt I} bit stands for {\tt Interruptible}.  The {\tt OS} (``One
-Shot'') bit indicates whether or not this instruction is part of an
-outer loop.  Both of the preceding bits are explained in the next
-section.
-
-\color{black}
  
-The abbreviation {\tt P} stands for {\it predicate}; this is a two-bit
-code that indicates if the instruction should be executed or ignored.
  
-
-
-\pagebreak
  \subsection{Life Cycle of an Instruction}
  
-The diagram below shows an input dock for purposes of illustration
-(behavior at an output dock is identical).
+The diagram below shows an input dock for purposes of illustration:
  
  \begin{center}
-\epsfig{file=indock,width=3in}\\
+\epsfig{file=in,width=4in}\\
  {\it an input dock}
  \end{center}
  
-Note the circle on the path between ``instr horn'' and ``instr fifo'';
-this is known as ``the hatch''.  The hatch has two states: sealed and
-unsealed.  When the machine powers up, the hatch is unsealed; it is
-sealed by the {\tt tail} instruction and unsealed whenever the outer
-loop counter is set to zero (for any reason\footnote{this
-  includes {\tt OLC} being decremented to zero, a {\tt setOuter} with
-  a literal field of zero, a {\tt setOuter} which copies a zero from
-  the data register to {\tt OLC}, or the occurrence of a
-  torpedo}).
-
-When an instruction arrives at the instruction horn, it waits there
-until the hatch is in the unsealed state.  The instruction then enters
-the instruction fifo.  When an instruction emerges from the
-instruction fifo, it arrives at the ``on deck'' stage, where it may
-execute.
-
-\subsubsection{Inner and Outer Loops}
-
-A programmer can perform two types of loops: {\it inner} loops of only
-one micro-instruction and {\it outer} loops of multiple
-micro-instructions.  Inner loops may be nested within an outer loop,
-but no other nesting of loops is allowed.  The paths used by inner
-loops and outer loops are shown below:
+Note the mux on the path between {\tt EF} (epilogue fifo) and {\tt IF}
+(instruction fifo); this is known as ``the hatch''.  The hatch has two
+states: sealed and unsealed.  When the machine powers up, the hatch is
+unsealed; it is sealed by the {\tt tail} instruction and unsealed
+whenever the outer loop counter is set to zero (for any
+reason\footnote{this includes {\tt OLC} being decremented to zero, a
+  {\tt set} instruction, or the occurrence
+  of a torpedo}).
+
+When an instruction arrives at the epilogue fifo ({\tt EF}), it waits
+there until the hatch is in the unsealed state; the instruction then
+enters the instruction fifo.  When an instruction emerges from the
+instruction fifo, it arrives at the ``on deck'' ({\tt OD}) stage,
+where it may execute.
  
  \begin{center}
-\begin{minipage}{2in}
-\begin{center}
-\epsfig{file=inner-loop,width=2in}\\
-{\it inner loop (in red)}
-\end{center}
-\end{minipage}
-\begin{minipage}{2in}
-\begin{center}
-\epsfig{file=outer-loop,width=2in}\\
-{\it outer loop (in red)}
-\end{center}
-\end{minipage}
+\epsfig{file=out,width=4in}\\
+{\it an output dock}
  \end{center}
  
-Each type of loop has a counter associated with it: the {\tt ILC}
-counter for inner loops and the {\tt OLC} counter for outer loops.
-The inner loop counter applies only to certain ``inner-looping''
-instructions (see the table below for details).  When such an
-instruction reaches On Deck, if its predicate is true it will execute
-a number of times equal to {\tt ILC+1}, and leave {\tt ILC=0} after
-executing.  Non-inner-looping instructions and instructions whose
-predicate is false do not decrement {\tt ILC}.
+\subsubsection{Torpedoes}
  
-The outer loop counter applies to all instructions {\it except} the
-instruction {\tt setOuter} with {\tt OS=1}, because such instructions
-are needed to reset the outer loop counter after it becomes zero.
-However, predicated {\tt setOuter} with {\tt OS=0} is useful for
-resetting the loop counter in the middle of the execution of a loop.
+A token sent to an instruction destination is called a {\it torpedo}.
+When a torpedo arrives at the tail of {\tt EF}, it is deposited in a
+waiting area (not shown) rather than being enqueued into {\tt EF}.
  
-\subsubsection{On Deck}
+\subsection{Format of an Instruction}
  
-The table below lists the actions which may be taken when an
-instruction arrives on deck:
+All instruction words have the following format:
  
-\color{red}
-\begin{center}
-\def\side#1{\begin{sideways}\parbox{15mm}{#1}\end{sideways}}
-\begin{tabular}{|r|ccccc|cccccc|}\hline
-%&\multicolumn{10}{c}{Predicate}&\\
-%&\multicolumn{10}{c}{True}&\\\hline
-&\multicolumn{5}{c}{Outer-Looping} &\multicolumn{5}{c}{One-Shot}&\\
-&\multicolumn{5}{c}{{\tt (OS=0)}} &\multicolumn{5}{c}{{\tt (OS=1)}}&\\
-&\side{{\tt move}}
-&\side{{\tt literal}}
-&\side{{\tt setFlags}}
-&\side{{\tt setInner}}
-&\side{{\tt setOuter}}
-&\side{{\tt move}}
-&\side{{\tt literal}}
-&\side{{\tt setFlags}}
-&\side{{\tt setInner}}
-&\side{{\tt setOuter}}
-&
-\\\hline
-Wait for hatch sealed,        & +   & +   & +   & +   & +   & -   & -   & -   & -   & -   &  \\
-then IF0 w/ copy of self      &     &     &     &     &     &     &     &     &     &     &  \\\hline
-Potentially torpedoable       & P+I & P+I & P+I & P+I & P+I & PI  & PI  & PI  & PI  & PI  & \\
-Execute                       & P+  & P+  & P+  & P+  & P+  & P   & P   & P   & P   & P   &  \\
-Inner-looping                 & P+  & -   & -   & -   & -   & P   & -   & -   & -   & -   &  \\
-\hline
-\end{tabular}
+\newcommand{\bitsHeader}{
+  \bitbox{1}{I} 
+  \bitbox{1}{OS}
+  \bitbox{3}{P} 
+}
+\newcommand{\bitsHeaderNoI}{
+  \bitbox{1}{} 
+  \bitbox{1}{OS}
+  \bitbox{3}{P} 
+}
  
-\begin{tabular}{|r|l|}\hline
-+       & Only if {\tt OLC>0} (ie {\tt OLC} is positive) \\
-P       & Only if predicate is true \\
-P+      & Only if predicate is true and {\tt OLC>0} \\
-PI      & Only if predicate is true and {\tt I=1}. \\
-P+I     & Only if predicate is true and {\tt OLC>0} and {\tt I=1}. \\\hline
-\end{tabular}
-\end{center}
+\setlength{\bitwidth}{3.5mm}
+{\tt \footnotesize
+\begin{bytefield}{37}
+  \bitheader[b]{0,10,11,31,32,34-36}\\
  \color{black}
-
-\color{red}
-{\bf Note:} a non-one-shot instruction may {\it execute} before the
-hatch is sealed, but may not {\it fill IF0} before the hatch is
-sealed.  The instruction will not vacate On Deck until both of these
-tasks are complete, so the second non-one-shot instruction in a loop
-will not execute until the hatch is sealed, {\it but the first
-  instruction will}.
+  \bitsHeader
+\color{light}
+  \bitbox[tbr]{21}{} 
+  \bitbox{11}{dispatch path} 
  \color{black}
+\end{bytefield}}
  
-\subsubsection{Torpedo}
-
-There is a small fifo (not shown) before the latch marked
-``Instruction Horn''; after the {\tt tail} instruction seals the
-hatch, any subsequent instructions will queue up in this fifo until
-the hatch is unsealed.  This is typically used as storage for a ``loop
-epilogue'' -- a sequence of instructions to be executed after a
-torpedo arrives or the outer loop counter expires.
-
-Each dock has a fourth connection to the switch fabric (not shown),
-called its {\it torpedo destination}.  Anything (even a token) sent to
-this destination is treated as a torpedo.  Note that because this is a
-distinct destination, instructions or data queued up in the other
-destination fifos will not prevent a torpedo from occuring.
-
-When a data item or token arrives at the torpedo destination, it lies
-there in wait until On Deck holds a potentially torpedoable
-instruction (see previous table).  Once this is the case, the torpedo
-causes the inner and outer loop counters to be set to zero (and
-therefore also unseals the hatch).\footnote{it is unspecified whether
-  the torpedoed instruction is requeued or not; this may or may not
-  occur, nondeterministically.  It is the programmer's responsibility
-  to ensure that the program behaves the same whether this happens or
-  not.  We think that this will not matter in most situations.}
+\begin{itemize}
+\item The {\tt I} bit stands for {\tt Interruptible}, and indicates if an
+instruction is vulnerable to torpedoes.  This bit only appears in {\tt move} instructions.
  
-\color{black}
+\item The {\tt OS} (``One Shot'') bit indicates whether or not this
+  instruction can pass through the pump more than once.  If set to
+  {\tt 1}, then the instruction is a ``one-shot'' instruction, and
+  does not pass through the instruction fifo more than once.
+
+\item  The {\tt P} bits are a {\it predicate}; this
+holds a code which indicates if the instruction should be executed or
+ignored depending on the state of flags in the dock.
+\end{itemize}
  
+\pagebreak
+\subsection{Loop Counters}
  
-\subsection{Flags}
+A programmer can perform two types of loops: {\it inner} loops of only
+one instruction and {\it outer} loops of multiple instructions.  Inner
+loops may be nested within an outer loop, but no other nesting of
+loops is allowed.
  
-The pump has three flags: {\tt A}, {\tt B}, and {\color{red}{\tt C}\color{black}\ }.
+The dock has two loop counters, one for each kind of loop:
+
+\begin{itemize}
+\item {\tt OLC} is the Outer Loop Counter
+\item {\tt ILC} is the Inner Loop Counter
+\end{itemize}
+
+The {\tt OLC} applies to all instructions and can hold integers {\tt
+  0..MAX_OLC}.
+
+The {\tt ILC} applies only to {\tt move} instructions and can hold
+integers {\tt 0..MAX_ILC} as well as a special value: $\infty$.  When
+{\tt ILC=0} the next {\tt move} instruction executes zero times (ie is
+ignored).  When {\tt ILC=$\infty$} the next {\tt move} instruction
+executes until interrupted by a torpedo.  After every {\tt move}
+instruction the {\tt ILC} is reset to {\tt 1} (note that it is reset
+to {\tt 1}, {\it not to 0}).
+
+\color{black}
+\subsection{Flags and Predication}
+
+The pump has three flags: {\tt A}, {\tt B}, and {\tt C}.
  
  \begin{itemize}
  \item The {\tt A} and {\tt B} flags are general-purpose flags which
@@ -406,58 +374,99 @@ The pump has three flags: {\tt A}, {\tt B}, and {\color{red}{\tt C}\color{black}
  %      operations (such as sending a completion token) only on the last
  %      iteration of an outer loop.
  
-\item The {\color{red}{\tt C}\color{black}\ } flag, known as the {\it control} flag.  
-      \color{red}At outboxes its value is determined by the ship; at
-      inboxes its value is copied from an unused address bit in the
-      destination to which the received value was sent.
-      \color{black}
-\end{itemize}
-
-Many instruction fields are specified as two-bit {\it predicates}.
-These fields contain one of four values, indicating if an action
-should be taken unconditionally or conditionally on one of the {\tt A}
-or {\tt B} flags:
+\item The {\tt C} flag is known as the {\it control} flag, and may be
+      set by the {\tt move} instruction based on information from the
+      ship or from an inbound packet.  See the {\tt move} instruction
+      for further details.
+\color{black}
  
-\begin{itemize}
-\item {\tt 00:} if {\tt A} is set
-\item {\tt 10:} if {\tt B} is set
-\item {\tt 01:} \color{red}if {\color{red}{\tt C}\color{black}\ } is set\color{black}
-\item {\tt 11:} always
  \end{itemize}
  
+The {\tt P} field specifies a three-bit {\it predicate}.  The
+predicate determines which conditions must be true in order for the
+instruction to execute; if it is not executed, it is simply {\it
+  ignored}.  The table below shows what conditions must be true in
+order for an instruction to execute:
+
+\begin{center}
+\begin{tabular}{|r|ll|}\hline
+Code       & Execute & if \\\hline
+{\tt 000:} & {\tt OLC$\neq$0} & and {\tt A=0} \\
+{\tt 001:} & {\tt OLC$\neq$0} & and {\tt A=1} \\
+{\tt 010:} & {\tt OLC$\neq$0} & and {\tt B=0} \\
+{\tt 011:} & {\tt OLC$\neq$0} & and {\tt B=1} \\
+{\tt 100:} & Unused & \\
+{\tt 101:} & {\tt OLC=0} & \\
+{\tt 110:} & {\tt OLC$\neq$0} & \\
+{\tt 111:} & always & \\
+\hline\end{tabular}
+\end{center}
  
  \pagebreak
-\section{Instructions}
+\subsection{On Deck}
  
-Here is a list of the instructions supported by the dock:
+When an instruction arrives on deck, two concurrent processes are
+started.  No subsequent instruction may come on deck until both
+processes have completed:
+
+\begin{enumerate}
+
+\item Requeueing:
+      \begin{itemize}
+      \item If the outer loop counter is zero ({\tt OLC=0})
+            \color{red}{\it before executing the
+            instruction}\color{black}\ or the instruction on deck is a
+            one-shot instruction ({\tt OS=1}), do nothing.
+      \item {\it Otherwise} wait for the hatch to be sealed and
+            enqueue a copy of the instruction currently on deck.
+      \end{itemize}
+
+\item Execution:
+
+      \begin{itemize}
+      \item
+      If the instruction's predicate condition is not met (see
+      section on predicates), do nothing.
+
+      \item
+      {\it Otherwise} if the instruction is interruptible ({\tt I=0})
+      and a torpedo is present in the waiting area: consume the
+      torpedo, set the outer loop counter to zero ({\tt OLC=0}),
+      set the inner loop counter to one ({\tt ILC=1}), 
+      unseal the hatch.
+
+      \item
+      {\it Otherwise} if {\tt ILC$\neq$0} or the instruction is {\it
+      not} a {\tt move}: execute the instruction.
+      \end{itemize}
+\end{enumerate}
  
-\begin{center}
-\begin{tabular}{|l|}\hline
-{\tt move} (variants: {\tt moveto}, {\tt dispatch}) \\
-{\tt literal} (variants: {\tt literalhi}, {\tt literallo})\\
-{\tt setFlags} \\
-{\tt setInner} \\
-{\tt setOuter} \\
-%{\tt torpedo} \\
-{\tt tail} \\\hline
-\end{tabular}
-\end{center}
  
  \color{black}
  
  
+\pagebreak
+\section{Instructions}
+
+The dock supports four instructions:
+{\tt move} (variants: {\tt moveto}, {\tt dispatch}),
+{\tt shift},
+{\tt set}, and
+{\tt tail}.
+\color{black}
+
+
  \subsection{{\tt move} (variants: {\tt moveto}, {\tt dispatch})}
  
-\setlength{\bitwidth}{5mm}
+\newcommand{\bitsMove}{\setlength{\bitwidth}{5mm}
  {\tt
  \begin{bytefield}{26}
-  \bitheader[b]{14-19,21}\\
+  \bitheader[b]{14-20}\\
  \color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS}
-  \bitbox{2}{P}
+  \bitsHeader
  \color{black}
-   \bitbox{3}{001} 
+  \bitbox{1}{0} 
+  \bitbox{1}{1} 
    \bitbox{1}{\tt Ti}
    \bitbox{1}{\tt Di}
    \bitbox{1}{\tt Dc}
@@ -466,35 +475,17 @@ Here is a list of the instructions supported by the dock:
    \bitbox[l]{19}{}
  \end{bytefield}}
  
-%\begin{bytefield}{26}
-%  \bitheader[b]{12-18}\\
-%  \bitbox[]{8}{\raggedleft Input Dock:}
-%  \bitbox[r]{2}{}
-%  \bitbox{1}{\tt So} 
-%  \bitbox{1}{\tt Dc}
-%  \bitbox[l]{15}{}
-%\end{bytefield}
-%
-%\begin{bytefield}{26}
-%  \bitheader[b]{12-18}\\
-%  \bitbox[]{8}{\raggedleft Output Dock:}
-%  \bitbox[r]{2}{}
-%  \bitbox{1}{\tt Si}
-%  \bitbox{1}{\tt To}
-%  \bitbox[l]{15}{}
-%\end{bytefield}
-
  \begin{bytefield}{26}
    \bitheader[b]{0,12,13}\\
-  \bitbox[1]{11}{\raggedleft {\tt moveto} ({\tt LiteralPath\to Path})}
+  \bitbox[1]{11}{\raggedleft {\tt moveto} ({\tt Immediate\to Path})}
    \bitbox[r]{1}{}
    \bitbox{1}{\tt 1}
-  \bitbox{13}{\tt LiteralPath}
+  \bitbox{13}{\tt Immediate}
  \end{bytefield}
  
  \begin{bytefield}{26}
-  \bitheader[b]{12,13}\\
-  \bitbox[1]{11}{\raggedleft {\tt dispatch} ({\tt DP[37:25]\to Path})\ \ }
+  \bitheader[b]{11,12,13}\\
+  \bitbox[1]{11}{\raggedleft {\tt dispatch} ({\footnotesize {\tt DataPredecessor[37:25]\to Path}})\ \ }
    \bitbox[r]{1}{}
    \bitbox{1}{\tt 0}
    \bitbox{1}{\tt 1}
@@ -504,7 +495,7 @@ Here is a list of the instructions supported by the dock:
  \end{bytefield}
  
  \begin{bytefield}{26}
-  \bitheader[b]{12,13}\\
+  \bitheader[b]{11,12,13}\\
    \bitbox[1]{11}{\raggedleft {\tt move} ({\tt Path} unchanged):}
    \bitbox[r]{1}{}
    \bitbox{1}{\tt 0}
@@ -512,7 +503,8 @@ Here is a list of the instructions supported by the dock:
  \color{light}
    \bitbox[trb]{12}{}
  \color{black}
-\end{bytefield}
+\end{bytefield}}
+\bitsMove
  
  \begin{itemize}
  \item {\tt Ti} - Token Input: wait for the token predecessor to be full and drain it.
@@ -525,104 +517,145 @@ Here is a list of the instructions supported by the dock:
  The data successor and token successor must both be empty in order for
  a {\tt move} instruction to attempt execution.
  
-The inner loop counter can hold a number {\tt 0..MAX} or a special
-value $\infty$.  If {\tt ILC} is nonzero after execution of a {\tt
-  move} instruction, the instruction will execute again, and {\tt ILC}
-will be latched with {\tt (ILC==$\infty$?$\infty$:max(ILC-1, 0))}.  When
-the inner loop counter reaches zero, the instruction ceases executing.
+Every time the {\tt move} instruction executes, the {\tt C} flag may
+be set:
  
+\begin{itemize}
+\item At an {\it input} dock the {\tt C} flag is set to the signal bit
+      of the incoming packet if {\tt Di} or {\tt Ti} is set.
+
+\item At an {\it output} dock the {\tt C} flag is set to a value
+      provided by the ship if the {\tt Di} bit is set, and to the
+      signal bit of the incoming packet if {\tt Di} is clear and {\tt
+      Ti} is set.
+\end{itemize}
+
+\color{black}
  
  \pagebreak
-\subsection{{\tt literal}, {\tt literalhi}, {\tt literallo}}
  
-These instructions load part or all of the data latch ({\tt D}).
+\subsection{{\tt set}}
  
-{\tt literalhi: Literal[18:1]\to D[37:20]} (and {\tt Literal[18]\to S})
+The {\tt set} command is used to set or decrement the inner loop
+counter, outer loop counter, and data latch.
  
+\newcommand{\bitsSet}{
  \setlength{\bitwidth}{5mm}
  {\tt
  \begin{bytefield}{26}
-  \bitheader[b]{0,18,19,21}\\
-\color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS} 
-  \bitbox{2}{P}
-\color{black}
+  \bitheader[b]{19-25}\\
+  \bitsHeaderNoI
+  \bitbox{1}{1}
    \bitbox{1}{0} 
-  \bitbox{2}{11} 
  \color{light}
-  \bitbox[trb]{1}{} 
+  \bitbox{5}{Dest} 
+  \bitbox{14}{} 
  \color{black}
-  \bitbox{18}{Literal} 
  \end{bytefield}}
  
-{\tt literallo: Literal[19:1]\to D[19:1]}
-
-\setlength{\bitwidth}{5mm}
-{\tt
  \begin{bytefield}{26}
-  \bitheader[b]{0,18,19,21}\\
-\color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS} 
-  \bitbox{2}{P}
-\color{black}
-  \bitbox{1}{0} 
-  \bitbox{2}{10} 
-  \bitbox{19}{Literal} 
-\end{bytefield}}
+  \bitheader[b]{0,5,12-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt Immediate}\to{\tt OLC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 1000\color{black}}
+  \bitbox{3}{\tt 100}
+  \bitbox{6}{}
+  \bitbox{6}{\tt Immediate}
+\end{bytefield}
  
-{\tt literal:}
+\begin{bytefield}{26}
+  \bitheader[b]{12-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt Data Latch}\to{\tt OLC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 1000\color{black}}
+  \bitbox{3}{\tt 010}
+  \bitbox{12}{}
+\end{bytefield}
  
-\setlength{\bitwidth}{5mm}
-{\tt
  \begin{bytefield}{26}
-  \bitheader[b]{0,18,19,21}\\
-\color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS} 
-  \bitbox{2}{P}
-\color{black}
-  \bitbox{1}{1} 
-  \bitbox{2}{SEL} 
-  \bitbox{19}{Literal} 
-\end{bytefield}}
+  \bitheader[b]{12-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt OLC-1}\to{\tt OLC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 1000\color{black}}
+  \bitbox{3}{\tt 001}
+  \bitbox{12}{}
+\end{bytefield}
  
-{\tt
-\begin{tabular}{|r|c|c|c|}\hline
-sel  & D[37:20]      & D[19:1]       \\\hline
-00  & Literal[18:1] & all 0         \\
-01  & Literal[18:1] & all 1         \\
-10  & all 0         & Literal[19:1] \\
-11  & all 1         & Literal[19:1] \\
-\hline
-\end{tabular}}
+\begin{bytefield}{26}
+  \bitheader[b]{0,5,6,12-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt Immediate}\to{\tt ILC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0100\color{black}}
+  \bitbox{3}{\tt 100}
+  \bitbox{5}{}
+  \bitbox{1}{\tt 0}
+  \bitbox{6}{\tt Immediate}
+\end{bytefield}
  
+\begin{bytefield}{26}
+  \bitheader[b]{6,12-18}\\
+  \bitbox[1]{6}{\raggedleft $\infty$\to{\tt ILC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0100\color{black}}
+  \bitbox{3}{\tt 100}
+  \bitbox{5}{}
+  \bitbox{1}{\tt 1}
+  \bitbox{6}{}
+\end{bytefield}
  
+\begin{bytefield}{26}
+  \bitheader[b]{12-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt Data Latch}\to{\tt ILC}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0100\color{black}}
+  \bitbox{3}{\tt 010}
+  \bitbox{12}{}
+\end{bytefield}
  
+\begin{bytefield}{26}
+  \bitheader[b]{0,13-18}\\
+  \bitbox[1]{6}{\raggedleft \footnotesize {\tt 0-Extended Immediate}\to{\tt Data Latch}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0010\color{black}}
+  \bitbox{1}{\tt 0}
+  \bitbox{14}{\tt Immediate}
+\end{bytefield}
  
-\subsection{{\tt setFlags}}
+\begin{bytefield}{26}
+  \bitheader[b]{0,13-18}\\
+  \bitbox[1]{6}{\raggedleft \footnotesize {\tt 1-Extended Immediate}\to{\tt Data Latch}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0010\color{black}}
+  \bitbox{1}{\tt 1}
+  \bitbox{14}{\tt Immediate}
+\end{bytefield}
  
-\setlength{\bitwidth}{5mm}
-{\tt
  \begin{bytefield}{26}
-  \bitheader[b]{0,5,6,11,12,17-19,21}\\
-\color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS} 
-  \bitbox{2}{P}
+  \bitheader[b]{0,5,6,11,15-18}\\
+  \bitbox[1]{6}{\raggedleft {\tt Update Flags}}
+  \bitbox[r]{1}{}
+  \bitbox{4}{\tt 0001\color{black}}
+  \bitbox{3}{}
+  \bitbox{6}{\tt nextA}
+  \bitbox{6}{\tt nextB}
+\end{bytefield}
+
+
  \color{black}
-  \bitbox{3}{000}
-  \bitbox{1}{0}
-  \bitbox{6}{nextA}
-  \bitbox{6}{nextB}
-  \bitbox{6}{nextC}
-\end{bytefield}}
  
-The {\tt P} field is a predicate; if it does not hold, the instruction
-is ignored.  Otherwise the flags are updated according to the {\tt
-  nextA}, {\tt nextB}, and {\tt nextC} fields; each specifies the new
-value as the logical {\tt OR} of zero or more inputs:
+}
+\bitsSet
+
+The FleetTwo implementation is likely to have an unarchitected
+``literal latch'' at the on deck ({\tt OD}) stage, which is loaded
+with the possibly-extended literal {\it at the time that the {\tt set}
+  instruction comes on deck}.  This latch is then copied into the data
+latch when a {\tt set Data Latch} instruction
+executes\color{black}.
+
+Each of the {\tt nextA} and {\tt nextB} fields has the following
+structure, and indicates which old flag values should be logically
+{\tt OR}ed together to produce the new flag value:
  
  \begin{center}
  {\tt
@@ -632,8 +665,8 @@ value as the logical {\tt OR} of zero or more inputs:
    \bitbox{1}{$\overline{\text{\tt A}}$}
    \bitbox{1}{${\text{\tt B}}$}
    \bitbox{1}{$\overline{\text{\tt B}}$}
-  \bitbox{1}{${\text{\color{red}{\tt C}\color{black}\ }}$}
-  \bitbox{1}{$\overline{\text{\color{red}{\tt C}\color{black}\ }}$}
+  \bitbox{1}{${\text{{\tt C}\ }}$}
+  \bitbox{1}{$\overline{\text{{\tt C}\ }}$}
  \end{bytefield}}
  \end{center}
  
@@ -641,151 +674,67 @@ Each bit corresponds to one possible input; all inputs whose bits are
  set are {\tt OR}ed together, and the resulting value is assigned to
  the flag.  Note that if none of the bits are set, the value assigned
  is zero.  Note also that it is possible to produce a {\tt 1} by {\tt
-  OR}ing any flag with its complement.  Note that {\tt setFlags} can
+  OR}ing any flag with its complement, and that {\tt set Flags} can
  be used to create a {\tt nop} (no-op) by setting each flag to itself.
  
  
-
-\pagebreak
-
-\subsection{{\tt setInner}}
-
-This instruction loads the inner loop counter with either a literal
-number, the special value $\infty$, or the contents of the {\tt data}
-register.
-
-\setlength{\bitwidth}{5mm}
-{\tt
-\begin{bytefield}{26}
-  \bitheader[b]{16-19,21}\\
-\color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS} 
-  \bitbox{2}{P}
-\color{black}
-  \bitbox{3}{000}
-  \bitbox{1}{1}
-  \bitbox{2}{01}
-\color{light}
-  \bitbox[tbr]{8}{}
-  \bitbox[l]{8}{}
-\color{black}
-\end{bytefield}}\\
-
-\begin{bytefield}{26}
-  \bitbox[r]{18}{\raggedleft from data latch:\hspace{0.2cm}\ }
-  \bitbox{2}{\tt 00}
-\color{light}
-  \bitbox[tbr]{6}{} 
-\color{black}
-\end{bytefield}
-
-\begin{bytefield}{26}
-  \bitheader[b]{0,5,6,7}\\
-  \bitbox[r]{18}{\raggedleft from literal:\hspace{0.2cm}\ }
-  \bitbox{2}{\tt 10}
-  \bitbox{6}{\tt Literal} 
-\end{bytefield}
-
-\begin{bytefield}{26}
-  \bitheader[b]{0,5,6,7}\\
-  \bitbox[r]{18}{\raggedleft with $\infty$\ \ }
-  \bitbox{2}{\tt 11} 
-\color{light}
-  \bitbox[tbr]{6}{} 
  \color{black}
-\end{bytefield}
  
+\pagebreak
+\subsection{{\tt shift}}
  
-\subsection{{\tt setOuter}}
+\newcommand{\shiftImmediateSize}{19}
  
-This instruction loads the outer loop counter {\tt OLC} with either
-{\tt max(0,OLC-1)}, a literal or the contents of the {\tt data}
-register.
+Each {\tt shift} instruction carries an immediate of \shiftImmediateSize\ 
+bits.  When a {\tt shift} instruction is executed, this immediate is copied
+into the least significant \shiftImmediateSize\  bits of the data latch,
+and the remaining most significant bits of the data latch are loaded
+with the value formerly in the least significant bits of the data latch.
+In this manner, large literals can be built up by ``shifting'' them
+into the data latch \shiftImmediateSize\ bits at a time.
  
+\newcommand{\bitsShift}{
  \setlength{\bitwidth}{5mm}
  {\tt
  \begin{bytefield}{26}
-  \bitheader[b]{16-19,21,24}\\
+  \bitheader[b]{0,18-20}\\
  \color{light}
-  \bitbox{1}{I}
-  \bitbox{1}{OS}
-\color{light}
-  \bitbox[tbr]{2}{P}
+  \bitsHeaderNoI
  \color{black}
-  \bitbox{3}{000}
-  \bitbox{1}{1}
-  \bitbox{2}{10}
-\color{light}
-  \bitbox[tbr]{9}{} 
-  \bitbox[l]{7}{}
+  \bitbox{1}{0} 
+  \bitbox{1}{0} 
  \color{black}
-\end{bytefield}}\\
+  \bitbox{\shiftImmediateSize}{Immediate} 
+\end{bytefield}}
+}
+\bitsShift
  
-\begin{bytefield}{26}
-  \bitbox[r]{19}{\raggedleft {\tt max(0,OLC-1)}:\hspace{0.2cm}\ }
-  \bitbox{2}{\tt 00} 
-%\color{light}
-  \bitbox[tbr]{5}{} 
-%\color{black}
-\color{black}
-\end{bytefield}
+The FleetTwo implementation is likely to have an unarchitected
+``literal latch'' at the on deck ({\tt OD}) stage, which is loaded
+with the literal {\it at the time that the {\tt shift} instruction
+  comes on deck}.  This latch is then copied into the data latch when
+the instruction executes.
  
-\begin{bytefield}{26}
-  \bitbox[r]{19}{\raggedleft from data latch:\hspace{0.2cm}\ }
-  \bitbox{2}{\tt 01} 
-\color{light}
-  \bitbox[tbr]{5}{} 
  \color{black}
-\end{bytefield}
-
-\begin{bytefield}{26}
-  \bitheader[b]{0,5,6}\\
-  \bitbox[r]{19}{\raggedleft from literal:\hspace{0.2cm}\ }
-  \bitbox{1}{\tt 1} 
-  \bitbox{6}{\tt Literal} 
-\end{bytefield}
-
-
-%\subsection{{\tt torpedo}}
-%
-%\setlength{\bitwidth}{5mm}
-%{\tt
-%\begin{bytefield}{26}
-%  \bitheader[b]{0,5,16-19,21}\\
-%\color{light}
-%  \bitbox{4}{} 
-%\color{black}
-%  \bitbox{3}{000} 
-%  \bitbox{1}{1}
-%  \bitbox{2}{00}
-%\color{light}
-%  \bitbox[tbr]{16}{} 
-%\end{bytefield}}
-%
-%
-%When a {\tt torpedo} instruction reaches the instruction horn, it will
-%wait there until an instruction is on deck whose {\tt A}rmor bit is
-%not set.  The {\tt torpedo} will then cause ``Process \#2'' of the on
-%deck instruction to terminate and will set the outer loop counter to zero.
  
  \subsection{{\tt tail}}
  
+\newcommand{\bitsTail}{
  \setlength{\bitwidth}{5mm}
  {\tt
  \begin{bytefield}{26}
-  \bitheader[b]{0,5,16-19,21}\\
+  \bitheader[b]{19-20}\\
  \color{light}
-  \bitbox{4}{} 
+  \bitbox{5}{} 
  \color{black}
-  \bitbox{3}{000} 
    \bitbox{1}{1}
-  \bitbox{2}{01}
+  \bitbox{1}{1}
  \color{light}
-  \bitbox[tbr]{16}{} 
-\end{bytefield}}
+  \bitbox[tbr]{19}{} 
+\end{bytefield}}}
+\bitsTail
  
-When a {\tt tail} instruction reaches {\tt IH}, it seals the hatch.
+When a {\tt tail} instruction reaches the hatch, it seals the hatch.
  The {\tt tail} instruction does not enter the instruction fifo.
  
  \color{black}
@@ -936,15 +885,33 @@ The {\tt tail} instruction does not enter the instruction fifo.
  
  
  \pagebreak
-\epsfig{file=overview,height=5in,angle=90}
+\section*{Instruction Encoding Map\color{black}}
+
+\hspace{-1cm}{\tt shift}\\
+\bitsShift
+
+\hspace{-1cm}{\tt set}\\
+\bitsSet
+
+\hspace{-1cm}{\tt move}\\
+\bitsMove
+
+\hspace{-1cm}{\tt tail}\\
+\bitsTail
+
+
+\color{black}
+
+\pagebreak
+\epsfig{file=all,height=5in,angle=90}
  
  \pagebreak
  \subsection*{Input Dock}
-\epsfig{file=indock,width=7in,angle=90}
+\epsfig{file=in,width=8in,angle=90}
  
  \pagebreak
  \subsection*{Output Dock}
-\epsfig{file=outdock,width=6.5in,angle=90}
+\epsfig{file=out,width=8in,angle=90}
  
  
  %\pagebreak