+\documentclass[10pt]{article}
+\usepackage{palatino}
+\usepackage{amsmath}
+\usepackage{epsfig}
+\usepackage{color}
+\usepackage{bytefield1}
+\usepackage{wrapfig}
+\usepackage{stmaryrd}
+\usepackage{subfigure}
+\usepackage{syntax}
+\usepackage{comment}
+\usepackage{fancyhdr}
+\usepackage{lastpage}
+\usepackage{multirow}
+\usepackage{multicol}
+\usepackage{rotating}
+\include{megacz}
+\bibliographystyle{alpha}
+\pagestyle{fancyplain}
+
+\definecolor{light}{gray}{0.7}
+
+\setlength{\marginparwidth}{1.2in}
+\let\oldmarginpar\marginpar
+\renewcommand\marginpar[1]{\-\oldmarginpar[\raggedleft\footnotesize #1]%
+{\raggedright\footnotesize #1}}
+
+
+\newcommand{\footnoteremember}[2]{
+ \footnote{#2}
+ \newcounter{#1}
+ \setcounter{#1}{\value{footnote}}
+} \newcommand{\footnoterecall}[1]{
+ \footnotemark[\value{#1}]
+}
+
+%\pdfpagewidth 8.5in
+%\pdfpageheight 11in
+%\topmargin 0in
+\textheight 8.2in
+%\textwidth 6.0in
+%\oddsidemargin 0.25in
+%\evensidemargin 0.25in
+%\headwidth 6.0in
+\def\to{\ $\rightarrow$\ }
+
+\def\docnum{AM33}
+
+\author{
+\normalsize{
+\begin{tabular}{c}
+\end{tabular}}
+}
+
+\title{\vspace{-1cm}AM33: The FleetTwo Dock
+\\
+{\normalsize
+Adam Megacz
+}}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+Changes:
+
+\begin{tabular}{rl}
+\color{red}
+09-Aug
+& \color{red} Removed the explicit ``decrement loop counter'' instruction \\
+& \color{red} Renamed {\tt D}-flag to {\tt Z}-flag \\
+\color{red}
+21-Jun
+& \color{red} Moved the {\tt P} (predicate) field to the MSB end of the word \\
+& \color{red} Changed to a single counter, full word width \\
+& \color{red} Included one extra Marina erratum I had forgotten \\
+& \color{red} Changed encoding of {\tt flush} to match internal encoding\\
+\color{black}
+25-May
+& Added errata for Kessel counter on Marina test chip \\
+18-May
+& Added errata for Marina test chip \\
+17-Feb
+& Clarified setting of the {\tt C}-flag\color{black}\\
+& Removed {\tt OS} bit\color{black}\\
+& Changed instruction length from 26 bits to 25\color{black}\\
+& Updated which bits are used when the {\tt Path} latch captures from the data predecessor\color{black}\\
+05-Jan
+& Fixed a one-word typo \\
+02-Jan
+& Added {\tt head} instruction \\
+& Lengthened external encoding of {\tt tail} instruction by one bit \\
+& Added {\tt abort} instruction \\
+& Removed {\tt OS} field from instructions \\
+& Renamed the {\tt Z}-flag (olc {\bf Z}ero) to the {\tt D}-flag (loop {\bf D}one)\\
+19-Dec
+& Updated diagram in section 3 to put dispatch path near MSB\\
+& Changed DP[37:25] to DP[37:27]\\
+& Added note on page 4 regarding previous\\
+14-Nov
+& Roll back ``Distinguish {\tt Z}-flag from OLC=0'' \\
+& Clarify what ``{\tt X-Extended}'' means \\
+& Change C-bit source selector from {\tt Di} to {\tt Dc} \\
+07-Nov
+& Distinguish {\tt Z}-flag from OLC=0\\
+& Add {\tt flush} instruction\\
+& Change {\t I} bit from ``Interruptable'' to ``Immune''\\
+20-Sep
+& Update hatch description to match \href{http://fleet.cs.berkeley.edu/docs/people/ivan.e.sutherland/ies50-Requeue.State.Diagram.pdf}{IES50} \\
+28-Aug
+& Note that decision to requeue is based on value of OLC {\it before} execution\\
+& Note that decision to open the hatch is based on value of {\tt OS} bit\\
+%10-Jul
+%& Added {\tt OLC=0} predicate \\
+%& Eliminated {\tt TAPL} (made possible by previous change) \\
+%& Expanded {\tt set} {\tt Immediate} field from 13 bits to 14 bits (made possible by previous change)\\
+%09-Jul
+%& Fixed a few typos \\
+%& Added {\tt DataLatch}\to{\tt TAPL} (Amir's request) \\
+%& Eliminate ability to predicate directly on {\tt C}-flag (Ivan's request) \\
+%16-Jun
+%& When a torpedo strikes, {\tt ILC} is set to {\tt 1} \\
+%& Only {\tt move} can be torpedoed (removed {\tt I}-bit from {\tt set}/{\tt shift}) \\
+%11-Jun
+%& Changed all uses of ``Payload'' to ``Immediate'' \color{black} (not in red) \\
+%& Reworked encoding of {\tt set} instruction \\
+%\color{black}
+%06-Jun
+%& Factored in Russell Kao's comments (thanks!)\\
+%& Added mechanism for setting C-flag from fabric even on outboxes\\
+%05-Jun
+%& Made {\tt OLC} test a predicate-controlled condition\\
+%& Rewrote ``on deck'' section \\
+%& Added ``{\tt unset}'' value for {\tt ILC}\\
+%& Changed {\tt DP} to {\tt DataPredecessor} for clarity\\
+%\color{black}
+%30-Apr
+%& added comment about address-to-path ship \\
+%& changed {\tt DST} field of {\tt set} instruction from 2 bits to 3 \\
+%& changed the order of instructions in the encoding map \\
+%23-Apr
+%& added epilogue fifo to diagrams \\
+%& indicated that a token sent to the instruction port is treated as a torpedo \\
+%18-Apr
+%& replaced {\tt setInner}, {\tt setOuter}, {\tt setFlags} with unified {\tt set} instruction \\
+%& replaced {\tt literal} with {\tt shift} instruction \\
+%17-Apr
+%& Made all instructions except {\tt setOuter} depend on {\tt OLC>0} \\
+%& Removed ability to manually set the {\tt C} flag \\
+%& Expanded predicate field to three bits \\
+%& New literals scheme (via shifting) \\
+%& Instruction encoding changes made at Ivan's request (for layout purposes) \\
+%& Added summary of instruction encodings on last page \\
+%07-Apr
+%& removed ``+'' from ``potentially torpedoable'' row where it does not occur in Execute \\
+%06-Apr
+%& extended {\tt LiteralPath} to 13 bits (impl need not use all of them) \\
+%& update table 3.1.2 \\
+%& rename {\tt S} flag to {\tt C} \\
+%& noted that {\tt setFlags} can be used as {\tt nop} \\
+%29-Mar
+%& removed the {\tt L} flag (epilogues can now do this) \\
+%& removed {\tt take\{Inner|Outer\}LoopCounter} instructions \\
+%& renamed {\tt data} instruction to {\tt literal} \\
+%& renamed {\tt send} instruction to {\tt move} \\
+%23-Mar
+%& added ``if its predicate is true'' to repeat count \\
+%& added note that red wires do not contact ships \\
+%& changed name of {\tt flags} instruction to {\tt setFlags} \\
+%& removed black dot from diagrams \\
+%& changed {\tt OL} (Outer Loop participant) to {\tt OS} (One Shot) and inverted polarity \\
+%& indicated that the death of the {\tt tail} instruction is what causes the hatch to be unsealed \\
+%& indicated that only {\tt send} instructions which wait for data are torpedoable \\
+%& added section ``Torpedo Details'' \\
+%& removed {\tt torpedo} instruction \\
+%12-Mar
+%\color{black}
+%& renamed loop+repeat to outer+inner (not in red) \\
+%& renamed {\tt Z} flag to {\tt L} flag (not in red) \\
+%& rewrote ``inner and outer loops'' section \\
+%& updated all diagrams \\
+%\color{black}
+%7-Mar
+%& Moved address bits to the LSB-side of a 37-bit instruction \\
+%& Added {\it micro-instruction} and {\it composite instruction} terms \\
+%& Removed the {\tt DL} field, added {\tt decrement} mode to {\tt loop} \\
+%& Created the {\tt Hold} field \\
+%& Changed how ReLooping works \\
+%& Removed {\tt clog}, {\tt unclog}, {\tt interrupt}, and {\tt massacre} \\
+\end{tabular}
+\end{abstract}
+
+\vfill
+
+\begin{center}
+\epsfig{file=all,height=1.5in}
+\epsfig{file=overview-new,height=1.5in}
+\end{center}
+
+\pagebreak
+
+\section{Overview of Fleet}
+
+A Fleet processor is organized around a {\it switch fabric}, which is
+a packet-switched network with reliable in-order delivery. The switch
+fabric is used to carry data between different functional units,
+called {\it ships}. Each ship is connected to the switch fabric by
+one or more programmable elements known as {\it docks}.
+
+A {\it path} specifies a route through the switch fabric from a
+particular {\it source} to a particular {\it destination}. The
+combination of a path and a single word to be delivered is called a
+{\it packet}. The switch fabric carries packets from their sources to
+their destinations. Each dock has \color{red}four\color{black}\
+destinations: one each for {\it instructions}, \color{red}{\it
+ torpedoes}, {\it tokens},\color{black}\ and {\it words}. A Fleet is
+programmed by depositing instruction packets into the switch fabric
+with paths that will lead them to instruction destinations of the
+docks at which they are to execute.
+
+When a packet arrives at the instruction destination of a dock, it is
+enqueued for execution. Before the instruction executes, it may cause
+the dock to wait for a packet to arrive at the dock's data destination
+or for a value to be presented by the ship. When an instruction
+executes it may consume this data and may present a data value to the
+ship or transmit a packet.
+
+Packets sent to token and torpedo destinations carry no payload. Such
+packets consume less energy than instruction packets or word packets.
+
+
+\begin{center}
+\epsfig{file=overview-new,width=2.5in}\\
+{\it Overview of a Fleet processor; dark gray shading represents the
+ switch fabric, ships are shown in light gray, and docks are shown in blue.}
+\end{center}
+\color{black}
+
+\pagebreak
+
+\section{The FleetTwo Dock}
+
+The diagram below represents a conceptual view of the interface
+between ships and the switch fabric; actual implementation circuitry
+may differ.
+
+\begin{center}
+\epsfig{file=all,width=3.5in}\\
+{\it An ``input'' dock and ``output'' dock connected to a ship. Solid
+ blue lines carry either tokens or data words, red lines carry either
+ instructions or torpedoes, and dashed lines carry only tokens.}
+\end{center}
+
+Each dock consists of a {\it data latch}, which is as wide as a single
+machine word and a circular {\it instruction fifo} of
+instruction-width latches. The values in the instruction fifo control
+the data latch. The dock also includes a {\it path latch}, which
+stores the path along which outgoing packets will be
+sent.
+
+Note that the instruction fifo in each dock has a destination of its
+own; this is the {\it instruction destination} mentioned in the
+previous section. A token sent to an instruction destination is
+called a {\it torpedo}; it does not enter the instruction fifo, but
+rather is held in a waiting area where it may interrupt certain
+instructions (see the section on the {\tt move} instruction for further
+details).
+
+From any source to any dock's data destination there are
+two distinct paths which differ by a single bit. This bit is known as
+the ``signal'' bit, and the routing of a packet is not affected by it;
+the signal bit is used to pass control values between docks. Note that paths
+terminating at an {\it instruction} destination need not have a signal
+bit.
+
+\color{red}
+Source-sequence guarantee. Shared across instruction/torpedo (?) and
+token/word destinations.
+\color{black}
+
+\pagebreak
+\section{Instructions}
+
+In order to cause an instruction to execute, the programmer must first
+arrange for that instruction word to arrive in the data latch of some
+output dock. For example, this might be the ``data read'' output dock
+of the memory access ship or the output of a fifo ship. Once an
+instruction has arrived at this output dock, it is {\it dispatched} by
+sending it to the {\it instruction destination} of the dock at which
+it is to execute.
+
+Each instruction is 25\color{black}\ bits long, which makes
+it possible for an instruction and an 12\color{black}-bit
+path to fit in a single word of memory. This path is the path from
+the {\it dispatching} dock to the {\it executing} dock.
+
+\vspace{0.5cm}
+
+\setlength{\bitwidth}{3.5mm}
+{\tt \footnotesize
+\begin{bytefield}{37}
+ \bitheader[b]{0,24,25,36}\\
+ \bitbox{12}{dispatch path}
+ \bitbox{25}{instruction}
+\end{bytefield}}
+\color{black}
+
+Note that the 12\color{black}\ bit {\tt dispatch path}
+field is not the same width as the 13 bit {\tt Immediate} path field
+in the {\tt move} instruction, which in turn may not be the same width
+as the actual path latches in the switch fabric.
+
+The algorithm for expanding a path to a wider width is specific to the
+switch fabric implementation, and is not specified by this
+document.\footnote{for the Marina experiment, the correct
+ algorithm is to sign-extend the path; the most significant bit of
+ the given path is used to fill the vacant bit of the latch} In
+particular, because the {\tt dispatch path} field is always used to
+specify a path which terminates at an instruction destination (never a
+data destination), and because instruction destinations ignore the
+signal bit, certain optimizations may be possible.
+
+%\subsection{Life Cycle of an Instruction}
+%
+%The diagram below shows an input dock for purposes of illustration:
+%
+%\begin{center}
+%\epsfig{file=in,width=4in}\\
+%{\it an input dock}
+%\end{center}
+%
+%\color{black}
+%
+%\begin{center}
+%\epsfig{file=out,width=4in}\\
+%{\it an output dock}
+%\end{center}
+
+%\subsection{Format of an Instruction}
+%
+%All instruction words have the following format:
+%
+%
+%
+%The {\tt P} bits are a {\it predicate}; this holds a code which
+%indicates if the instruction should be executed or ignored depending
+%on the state of flags in the dock. Note that {\tt head} and {\tt
+%tail} instructions do not have {\tt P} fields.
+
+
+\subsection{Loop Counter}
+
+A programmer can perform two types of loops: {\it inner} loops
+consisting of only one {\tt move} instruction and {\it outer} loops of
+multiple instructions of any type. Inner loops may be nested within
+an outer loop, but no other nesting of loops is allowed.
+
+The dock has \color{red}one loop counter, called {\tt LC}. It is the
+same width as a word carried through the switch fabric (37 bits).
+
+\color{black}
+
+\subsection{Flags}
+
+The dock has four flags: {\tt A}, {\tt B},
+{\tt C}, and \color{red}{\tt Z}\color{black}.
+
+\begin{itemize}
+\item The {\tt A} and {\tt B} flags are general-purpose flags which
+ may be set and cleared by the programmer.
+
+%\item
+%
+% The {\tt L} flag, known as the {\it last} flag, is set whenever
+% the value in the outer counter ({\tt OLC}) is one,
+\color{black}
+% indicating
+% that the dock is in the midst of the last iteration of an
+% outer loop. This flag can be used to perform certain
+% operations (such as sending a completion token) only on the last
+% iteration of an outer loop.
+
+\item The {\tt C} flag is known as the {\it control} flag, and may be
+ set by the {\tt move} instruction based on information from the
+ ship or from an inbound packet. See the {\tt move} instruction
+ for further details.
+
+\item The \color{red}{\tt Z}\color{black}\ flag is known as the
+ \color{red}{\it zero}\color{black}\ flag. The \color{red}{\tt
+ Z}\color{black}\ flag is {\it set} whenever the {\tt LC} is zero.
+ In an actual implementation the \color{red}{\tt Z}\color{black}\
+ flag might require an actual latch; it might simply be derived
+ from the ``zeroness'' of the {\tt LC}.\color{black}
+
+\end{itemize}
+
+\subsection{Predication}
+
+All instructions except for {\tt head} and {\tt tail} have a three-bit
+field marked {\tt P}, which specifies a {\it predicate}.
+
+\begin{center}
+\setlength{\bitwidth}{5mm}
+{\tt{\footnotesize{
+\begin{bytefield}{25}
+ \bitheader[b]{0,21,22,24}\\
+ \bitbox{3}{P}
+ \bitbox[tbr]{22}{}
+\color{black}
+\end{bytefield}}}}
+\end{center}
+
+The predicate determines which conditions must be true in order for
+the instruction to execute; if it is not executed, it is simply {\it
+ ignored}. The table below shows what conditions must be true in
+order for an instruction to execute:
+
+\begin{center}
+\begin{tabular}{|r|l|}\hline
+Code & Execute if \\\hline
+{\tt 000:} & {\tt Z=0}\ and {\tt A=0} \\
+{\tt 001:} & {\tt Z=0}\ and {\tt A=1} \\
+{\tt 010:} & {\tt Z=0}\ and {\tt B=0} \\
+{\tt 011:} & {\tt Z=0}\ and {\tt B=1} \\
+{\tt 100:} & Unused \\
+{\tt 101:} & {\tt Z=1}\ \\
+{\tt 110:} & {\tt Z=0}\ \\
+{\tt 111:} & always \\
+\hline\end{tabular}
+\end{center}
+
+\pagebreak
+
+\begin{wrapfigure}{r}{40mm}
+ \begin{center}
+\epsfig{file=requeue,height=1.5in}\\
+ \end{center}
+ \caption{{\it the requeue stage}}
+\end{wrapfigure}
+
+\subsection{The Requeue Stage}
+
+The requeue stage has two inputs, which will be referred to as the
+{\it enqueueing} input and the {\it recirculating} input. It has a
+single output which feeds into the instruction fifo.
+
+The requeue stage has two states: {\sc Updating} and {\sc
+ Circulating}.
+
+\subsubsection{The {\sc Updating} State}
+
+On initialization, the dock is in the {\sc Updating} state. In this
+state the requeue stage is performing three tasks:
+\begin{itemize}
+\item it is draining the
+previous loop's instructions (if any) from the fifo
+\item it is executing any ``one
+shot'' instructions which come between the previous loop's {\tt tail}
+and the next loop's {\tt head}
+\item it is loading the instructions of
+the next loop into the fifo.
+\end{itemize}
+
+In the {\sc Updating} state, the requeue stage will accept any
+instruction other than a {\tt tail} which arrives at its {\it
+ enqueueing} input, and pass this instruction to its output. Any
+instruction other than a {\tt head} which arrives at the {\it
+ recirculating} input will be discarded.
+
+Note that when a {\tt tail} instruction arrives at the {\it
+ enqueueing} input, it ``gets stuck'' there. Likewise, when a {\tt
+ head} instruction arrives at the {\it recirculating} input, it also
+``gets stuck''. When the requeue stage finds {\it both} a {\tt tail}
+instruction stuck at the {\it enqueueing} input and a {\tt head}
+instruction stuck at the {\it recirculating} input, the requeue stage
+discards both the {\tt head} and {\tt tail} and transitions to the
+{\sc Circulating} state.
+
+\subsubsection{The {\sc Circulating} State}
+
+In the {\sc Circulating} state, the dock repeatedly executes the set
+of instructions that are in the instruction fifo.
+
+In the {\sc Circulating} state, the requeue stage will not accept
+items from its {\it enqueueing} input. Any item presented at the {\it
+ recirculating} input will be passed through to the requeue stage's
+output.
+
+When an {\tt abort} instruction is executed, the requeue stage
+transitions back to the {\sc Updating} state. Note that {\tt abort}
+instructions include a predicate; an {\tt abort} instruction whose
+predicate is not met will not cause this transition.
+
+\color{black}
+
+
+\pagebreak
+\section{Instructions}
+
+%The dock supports four instructions:
+%{\tt move} (variants: {\tt moveto}, {\tt dispatch}),
+%{\tt shift},
+%{\tt set}, and
+%{\tt tail}.
+%\color{black}
+
+
+\subsection{{\tt move}}
+
+\newcommand{\bitsMove}{\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{14-21}\\
+\color{light}
+ \bitbox{3}{P}
+\color{black}
+\color{red}
+ \bitbox{1}{0}
+ \bitbox{1}{R}
+ \bitbox{1}{I}
+\color{black}
+ \bitbox{1}{\tt Ti}
+ \bitbox{1}{\tt Di}
+ \bitbox{1}{\tt Dc}
+ \bitbox{1}{\tt Do}
+ \bitbox{1}{\tt To}
+ \bitbox[l]{19}{}
+\end{bytefield}}
+
+\begin{bytefield}{25}
+ \bitheader[b]{0,12,13}\\
+ \bitbox[1]{10}{\raggedleft {\tt moveto} ({\tt Immediate\to Path})}
+ \bitbox[r]{1}{}
+ \bitbox{1}{\tt 1}
+ \bitbox{13}{\tt Immediate}
+\end{bytefield}
+
+\begin{bytefield}{25}
+ \bitheader[b]{11,12,13}\\
+ \bitbox[1]{10}{\raggedleft {\tt dispatch} ({\footnotesize {\tt DataPredecessor[37:26\color{black}]\to Path}})\ \ }
+ \bitbox[r]{1}{}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 1}
+\color{light}
+ \bitbox[trb]{12}{}
+\color{black}
+\end{bytefield}
+
+\begin{bytefield}{25}
+ \bitheader[b]{11,12,13}\\
+ \bitbox[1]{10}{\raggedleft {\tt move} ({\tt Path} unchanged):}
+ \bitbox[r]{1}{}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 0}
+\color{red}
+ \bitbox{1}{\tt 0}
+\color{light}
+ \bitbox[trb]{11}{}
+\color{black}
+\end{bytefield}
+
+}
+\bitsMove
+
+\begin{itemize}
+\item {\tt Ti} - Token Input: wait for the token predecessor to be full and drain it.
+\item {\tt Di} - Data Input: wait for the data predecessor to be full and drain it.
+\item {\tt Dc} - Data Capture: pulse the data latch.
+\item {\tt Do} - Data Output: fill the data successor.
+\item {\tt To} - Token Output: fill the token successor.
+\end{itemize}
+
+The data successor and token successor must both be empty in order for
+a {\tt move} instruction to attempt execution.
+
+\color{red}
+If the {\tt S} bit is set (not shown -- there is no space left!), the
+{\tt move} instruction will subtract one from the {\tt LC} counter
+each time it executes.
+NOTE: the flavor of {\tt set} instruction which decrements the counter
+is now unnecessary; we can simply use a ``do-nothing {\tt move}'' with
+the {\tt S}-bit set for that.
+
+If the {\tt R} bit is set, the {\tt move} instruction will execute
+repeatedly until its predicate no longer holds (or a torpedo strikes).
+An ``infinite'' or ``standing'' move can be achieved by setting the
+{\tt R} bit and clearing the {\tt S} bit.
+\color{black}
+
+\subsection*{Torpedoes}
+
+The {\tt I} bit stands for {\tt Immune}, and indicates if the
+instruction is immune to torpedoes. If a {\tt move} instruction which
+is not immune is waiting to execute and a torpedo is lying in wait,
+the torpedo {\it strikes}. \color{red}When a torpedo strikes, the
+{\tt move} instruction and the torpedo are both consumed and the {\tt
+ LC} is set to zero.\color{black}
+
+\subsection*{The C Flag}
+
+Every time the {\tt move} instruction executes, the {\tt C} flag may
+be set:
+
+\begin{itemize}
+\item At an {\it input} dock the {\tt C} flag is set to the signal bit
+ of the incoming packet.
+
+\item At an {\it output} dock the {\tt C} flag is set to a value
+ provided by the ship if the {\tt Dc} bit is set. If the {\tt
+ Dc} bit is not set, the {\tt C} flag is set to the signal bit of
+ the incoming packet.
+\end{itemize}
+\color{black}
+
+\subsection*{Flushing}
+
+The {\tt flush} instruction is a variant of {\tt move} which is valid
+only at input docks. It has the same effect as {\tt deliver}, except
+that it sets a special ``flushing'' indicator along with the data
+being delivered.
+
+\newcommand{\bitsFlush}{\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{14-18}\\
+ \bitbox[r]{6}{\raggedleft{\tt flush\ \ }}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 1}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 0}
+\color{red}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 0}
+ \bitbox{1}{\tt 1}
+\color{black}
+ \bitbox{11}{}
+\end{bytefield}}}
+\bitsFlush
+
+When a ship fires, it must examine the ``flushing'' indicators on the
+input docks whose fullness was part of the firing condition. If all
+of the input docks' flushing indicators are set, the ship must drain
+all of their data successors and take no action. If some, but not
+all, of the indicators are set, the ship must drain {\it only the data
+ successors of the docks whose indicators were {\bf not} set}, and
+take no action. If none of the flushing indicators was set, the ship
+fires normally.
+
+\color{black}
+
+\pagebreak
+
+\subsection{{\tt set}}
+
+The {\tt set} command is used to set the data latch, the flags, or the
+loop counter.
+
+\newcommand{\bitsSet}{
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{19-21}\\
+\color{light}
+ \bitbox{3}{P}
+\color{black}
+\color{red}
+ \bitbox{1}{1}
+ \bitbox{1}{0}
+ \bitbox{1}{1}
+\color{black}
+\color{light}
+ \bitbox{4}{Dest}
+ \bitbox{3}{Src}
+ \bitbox{12}{}
+\color{black}
+\end{bytefield}}
+
+\begin{bytefield}{25}
+ \bitheader[b]{0,11-18}\\
+ \bitbox[1]{5}{\raggedleft {\tt Immediate}\to{\tt LC}}
+ \bitbox[r]{1}{}
+ \bitbox{4}{\tt 1000\color{black}}
+ \bitbox{3}{\tt 100}
+ \color{red}
+ \bitbox{12}{\tt Immediate}
+ \color{black}
+\end{bytefield}
+
+\begin{bytefield}{25}
+ \bitheader[b]{12-18}\\
+ \bitbox[1]{5}{\raggedleft {\tt Data Latch}\to{\tt LC}}
+ \bitbox[r]{1}{}
+ \bitbox{4}{\tt 1000\color{black}}
+ \bitbox{3}{\tt 010}
+ \bitbox{12}{}
+\end{bytefield}
+
+\begin{bytefield}{25}
+ \bitheader[b]{0,13-18}\\
+ \bitbox[1]{5}{\raggedleft \footnotesize {\tt Sign-Extended Immediate}\to{\tt Data Latch}}
+ \bitbox[r]{1}{}
+ \bitbox{4}{\tt 0010\color{black}}
+ \bitbox{1}{\begin{minipage}{0.5cm}{
+\begin{center}
+\tt{\footnotesize{Si
+
+\vspace{-2mm}gn}}
+\end{center}}
+\end{minipage}}
+ \bitbox{14}{\tt Immediate}
+\end{bytefield}
+
+\begin{bytefield}{25}
+ \bitheader[b]{0,5,6,11,15-18}\\
+ \bitbox[1]{5}{\raggedleft {\tt Update Flags}}
+ \bitbox[r]{1}{}
+ \bitbox{4}{\tt 0001\color{black}}
+ \bitbox{3}{}
+ \bitbox{6}{\tt nextA}
+ \bitbox{6}{\tt nextB}
+\end{bytefield}
+\color{black}
+}
+\bitsSet
+
+The FleetTwo implementation is likely to have an unarchitected
+``literal latch'' at the on deck ({\tt OD}) stage, which is loaded
+with the possibly-extended literal {\it at the time that the {\tt set}
+ instruction comes on deck}. This latch is then copied into the data
+latch when a {\tt set Data Latch} instruction
+executes.
+
+The {\tt Sign-Extended Immediate} instruction copies the {\tt
+Immediate} field into the least significant bits of the data latch.
+All other bits of the data latch are filled with a copy of the
+bit marked ``{\tt Sign}.''
+\color{black}
+
+Each of the {\tt nextA} and {\tt nextB} fields has the following
+structure, and indicates which old flag values should be logically
+{\tt OR}ed together to produce the new flag value:
+
+\begin{center}
+{\tt
+\begin{bytefield}{6}
+ \bitheader[b]{0-5}\\
+ \bitbox{1}{${\text{\tt A}}$}
+ \bitbox{1}{$\overline{\text{\tt A}}$}
+ \bitbox{1}{${\text{\tt B}}$}
+ \bitbox{1}{$\overline{\text{\tt B}}$}
+ \bitbox{1}{${\text{{\tt C}\ }}$}
+ \bitbox{1}{$\overline{\text{{\tt C}\ }}$}
+\end{bytefield}}
+\end{center}
+
+Each bit corresponds to one possible input; all inputs whose bits are
+set are {\tt OR}ed together, and the resulting value is assigned to
+the flag. Note that if none of the bits are set, the value assigned
+is zero. Note also that it is possible to produce a {\tt 1} by {\tt
+ OR}ing any flag with its complement, and that {\tt set Flags} can
+be used to create a {\tt nop} (no-op) by setting each flag to itself.
+
+
+\color{black}
+
+\pagebreak
+\subsection{{\tt shift}}
+
+\newcommand{\shiftImmediateSize}{19}
+
+Each {\tt shift} instruction carries an immediate of \shiftImmediateSize\
+bits. When a {\tt shift} instruction is executed, this immediate is copied
+into the least significant \shiftImmediateSize\ bits of the data latch,
+and the remaining most significant bits of the data latch are loaded
+with the value formerly in the least significant bits of the data latch.
+In this manner, large literals can be built up by ``shifting'' them
+into the data latch \shiftImmediateSize\ bits at a time.
+
+\newcommand{\bitsShift}{
+\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{0,18-21}\\
+\color{light}
+ \bitbox{3}{P}
+\color{black}
+\color{red}
+ \bitbox{1}{1}
+ \bitbox{1}{0}
+ \bitbox{1}{0}
+\color{black}
+ \bitbox{\shiftImmediateSize}{Immediate}
+\end{bytefield}}
+}
+\bitsShift
+
+The FleetTwo implementation is likely to have an unarchitected
+``literal latch'' at the on deck ({\tt OD}) stage, which is loaded
+with the literal {\it at the time that the {\tt shift} instruction
+ comes on deck}. This latch is then copied into the data latch when
+the instruction executes.
+
+\color{black}
+
+\subsection{{\tt abort}}
+\newcommand{\bitsAbort}{\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{18-21}\\
+\color{light}
+ \bitbox{3}{P}
+\color{black}
+\color{red}
+ \bitbox{1}{1}
+ \bitbox{1}{1}
+ \bitbox{1}{0}
+\color{black}
+ \bitbox{1}{0}
+\color{light}
+ \bitbox[tbr]{18}{}
+\end{bytefield}}}
+\bitsAbort
+
+An {\tt abort} instruction causes a loop to exit; see the section on
+the Requeue Stage for further details.
+
+\subsection{{\tt head}}
+\newcommand{\bitsHead}{
+\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{18-21}\\
+\color{light}
+ \bitbox{3}{}
+\color{black}
+\color{red}
+ \bitbox{1}{1}
+ \bitbox{1}{1}
+ \bitbox{1}{1}
+\color{black}
+ \bitbox{1}{0}
+\color{light}
+ \bitbox[tbr]{18}{}
+\end{bytefield}}}
+\bitsHead
+
+A {\tt head} instruction marks the start of a loop; see the section on
+the Requeue Stage for further details.
+
+\color{black}
+\subsection{{\tt tail}}
+\newcommand{\bitsTail}{
+\setlength{\bitwidth}{5mm}
+{\tt
+\begin{bytefield}{25}
+ \bitheader[b]{18-21}\\
+\color{light}
+ \bitbox{3}{}
+\color{black}
+\color{red}
+ \bitbox{1}{1}
+ \bitbox{1}{1}
+ \bitbox{1}{1}
+\color{black}
+ \bitbox{1}{1}
+\color{light}
+ \bitbox[tbr]{18}{}
+\end{bytefield}}}
+\bitsTail
+
+A {\tt tail} instruction marks the end of a loop; see the section on
+the Requeue Stage for further details.
+
+\color{black}
+%\pagebreak
+%\subsection{{\tt takeOuterLoopCounter}}
+%
+%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{16-19,21}\\
+%\color{light}
+% \bitbox{1}{A}
+% \bitbox{1}{OS}
+% \bitbox{2}{P}
+%\color{black}
+% \bitbox{3}{000}
+% \bitbox{1}{0}
+% \bitbox{2}{11}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\color{black}
+%\end{bytefield}}
+%
+%This instruction copies the value in the outer loop counter {\tt OLC}
+%into the least significant bits of the data latch and leaves all other
+%bits of the data latch unchanged.
+%
+%\subsection{{\tt takeInnerLoopCounter}}
+%
+%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{16-19,21}\\
+%\color{light}
+% \bitbox{1}{A}
+% \bitbox{1}{OS}
+% \bitbox{2}{P}
+%\color{black}
+% \bitbox{3}{???}
+% \bitbox{1}{?}
+% \bitbox{2}{??}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\color{black}
+%\end{bytefield}}
+%
+%This instruction copies the value in the inner loop counter {\tt ILC}
+%into the least significant bits of the data latch and leaves all other
+%bits of the data latch unchanged.
+%
+%
+%
+%%\pagebreak
+%%\subsection{{\tt interrupt}}
+%%
+%%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{0,5,16-19,21}\\
+%\color{light}
+% \bitbox{4}{}
+%\color{black}
+% \bitbox{3}{000}
+% \bitbox{1}{1}
+% \bitbox{2}{00}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\end{bytefield}}
+%
+%When an {\tt interrupt} instruction reaches {\tt IH}, it will wait
+%there for the {\tt OD} stage to be full with an instruction that has
+%the {\tt IM} bit set. When this occurs, the instruction at {\tt OD}
+%{\it will not execute}, but {\it may reloop} if the conditions for
+%relooping are met.
+%\footnote{The ability to interrupt an instruction yet have it reloop is very
+%useful for processing chunks of data with a fixed size header and/or
+%footer and a variable length body.}
+%
+%
+%\subsection{{\tt massacre}}
+%
+%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{16-19,21}\\
+%\color{light}
+% \bitbox{4}{}
+%\color{black}
+% \bitbox{3}{000}
+% \bitbox{1}{1}
+% \bitbox{2}{01}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\color{black}
+%\end{bytefield}}
+%
+%When a {\tt massacre} instruction reaches {\tt IH}, it will wait there
+%for the {\tt OD} stage to be full with an instruction that has the
+%{\tt IM} bit set. When this occurs, all instructions in the
+%instruction fifo (including {\tt OD}) are retired.
+%
+%\subsection{{\tt clog}}
+%
+%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{16-19,21}\\
+%\color{light}
+% \bitbox{4}{}
+%\color{black}
+% \bitbox{3}{000}
+% \bitbox{1}{1}
+% \bitbox{2}{10}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\color{black}
+%\end{bytefield}}
+%
+%When a {\tt clog} instruction reaches {\tt OD}, it remains there and
+%no more instructions will be executed until an {\tt unclog} is
+%performed.
+%
+%\subsection{{\tt unclog}}
+%
+%\setlength{\bitwidth}{5mm}
+%{\tt
+%\begin{bytefield}{25}
+% \bitheader[b]{16-19,21}\\
+%\color{light}
+% \bitbox{4}{}
+%\color{black}
+% \bitbox{3}{000}
+% \bitbox{1}{1}
+% \bitbox[lrtb]{2}{11}
+%\color{light}
+% \bitbox[tbr]{16}{}
+%\color{black}
+%\end{bytefield}}
+%
+%When an {\tt unclog} instruction reaches {\tt IH}, it will wait there
+%until a {\tt clog} instruction is at {\tt OD}. When this occurs, both
+%instructions retire.
+%
+%Note that issuing an {\tt unclog} instruction to a dock which is not
+%clogged and whose instruction fifo contains no {\tt clog} instructions
+%will cause the dock to deadlock.
+
+\pagebreak
+\section*{Marina Errata}
+
+The following additional restrictions have been imposed on the dock in
+the Marina test chip:
+
+\subsection*{All Versions}
+
+\begin{enumerate}
+
+\item
+A Marina dock initializes with the {\tt ILC}, {\tt OLC}, and flags in
+an indeterminate state.
+
+\item
+The instruction immediately after a {\tt move} instruction must not be
+a {\tt set flags} instruction which utilizes the {\tt C}-flag (the
+value of the {\tt C}-flag is not stable for a brief time after a {\tt
+ move}).
+
+\color{red}
+
+\item
+If a {\tt move} instruction is torpedoable (ie it has the {\tt I} bit
+set to {\tt 0}), it {\it must} have either the {\tt Ti} bit or {\tt
+ Di} bit set (or both). It is not permitted for a torpedoable {\tt
+ move} to have both bits cleared.
+
+\color{black}
+
+\end{enumerate}
+
+
+\subsection*{Marina with Ivan's Counter}
+
+\begin{enumerate}
+
+\item
+
+A torpedoable {\tt move} instruction must not be followed immediately
+by a {\tt set olc} instruction or another torpedoable {\tt move}.
+
+\item
+
+This document specifies that when a torpedoable {\tt move} instruction
+executes successfully, the \color{red}{\tt Z}\color{black} flag is unchanged. In Marina, when
+a torpedoable {\tt move} instruction executes successfully, it causes
+the \color{red}{\tt Z}\color{black} flag to be set if the {\tt OLC} was zero and causes it to
+be cleared if the {\tt OLC} was nonzero. Thus, in the following
+instruction sequence:
+
+ \begin{verbatim}
+ head;
+ [*] set olc=1;
+ send token to self:i;
+ [T] recv token;
+ [*] send token to self;
+ [T] recv token;
+ [*] abort;
+ tail;
+ \end{verbatim}
+
+Will leave the \color{red}{\tt Z}\color{black} flag {\it set} on Marina, whereas a strict
+implementation of this document would leave it cleared.
+
+In practice, this distinction rarely matters.
+
+\end{enumerate}
+
+\subsection*{Marina with Kessels Counter}
+
+With the Kessels counter, the \color{red}{\tt Z}\color{black}-flag {\it is exactly equal to}
+the zeroness of the {\tt OLC}; it cannot be ``out of sync'' with it.
+
+\begin{enumerate}
+
+\item
+Every ``load OLC'' instruction must be predicated on the \color{red}{\tt Z}\color{black}-flag
+being {\it set}. This is a sneaky way of forcing the programmer to
+``run down'' the counter before loading it, because Kessels' counter
+does not support ``unloading.''
+
+\item
+Every ``decrement OLC'' instruction must be predicated on the {\tt
+ D}-flag being {\it cleared}. This way we never have to check if the
+counter is already empty before decrementing.
+
+\item
+The instruction after a torpedoable {\tt move} must not be predicated
+on the \color{red}{\tt Z}\color{black}-flag being {\it set} (it may be predicated on the {\tt
+ D}-flag being {\it cleared}. This is because, while the move
+instruction is waiting to execute, the \color{red}{\tt Z}\color{black}-flag will be cleared,
+and the predicate stage believes that it can skip the instruction even
+though {\tt do[ins]} is still high (I think this is dumb).
+
+
+\end{enumerate}
+
+\color{black}
+
+\pagebreak
+\section*{Instruction Encoding Map\color{black}}
+
+
+\vspace{3mm}\hspace{-1cm}{\tt move}\hspace{1cm}\vspace{-6mm}\\
+\bitsMove
+\bitsFlush
+
+\vspace{3mm}\hspace{-1cm}{\tt shift}\hspace{1cm}\vspace{-6mm}\\
+\bitsShift
+
+\vspace{3mm}\hspace{-1cm}{\tt set}\hspace{1cm}\vspace{-6mm}\\
+\bitsSet
+
+\vspace{3mm}\hspace{-1cm}{\tt abort}\hspace{1cm}\vspace{-6mm}\\
+\bitsAbort
+
+\vspace{3mm}\hspace{-1cm}{\tt head}\hspace{1cm}\vspace{-6mm}\\
+\bitsHead
+
+\vspace{3mm}\hspace{-1cm}{\tt tail}\hspace{1cm}\vspace{-6mm}\\
+\bitsTail
+
+
+%\pagebreak
+%\epsfig{file=all,height=5in,angle=90}
+
+%\pagebreak
+%\subsection*{Input Dock}
+%\epsfig{file=in,width=8in,angle=90}
+
+%\pagebreak
+%\subsection*{Output Dock}
+%\epsfig{file=out,width=8in,angle=90}
+
+
+%\pagebreak
+%\epsfig{file=ports,height=5in,angle=90}
+
+%\pagebreak
+%\epsfig{file=best,height=5in,angle=90}
+
+
+\end{document}