Implement SSE2 floating-point support in the x86 native code generator (#594)
[ghc-hetmet.git] / docs / users_guide / using.xml
index af7950c..329c31f 100644 (file)
@@ -1667,6 +1667,26 @@ f "2"    = 2
 
        <varlistentry>
          <term>
+            <option>-fno-float-in</option>
+            <indexterm><primary><option>-fno-float-in</option></primary></indexterm>
+          </term>
+         <listitem>
+           <para>Turns off the float-in transformation.</para>
+         </listitem>
+       </varlistentry>
+
+       <varlistentry>
+         <term>
+            <option>-fno-specialise</option>
+            <indexterm><primary><option>-fno-specialise</option></primary></indexterm>
+          </term>
+         <listitem>
+           <para>Turns off the automatic specialisation of overloaded functions.</para>
+         </listitem>
+       </varlistentry>
+
+       <varlistentry>
+         <term>
             <option>-fspec-constr</option>
             <indexterm><primary><option>-fspec-constr</option></primary></indexterm>
           </term>
@@ -1951,6 +1971,10 @@ f "2"    = 2
 
             <para>There is no means (currently) by which this value
              may vary after the program has started.</para>
+
+            <para>The current value of the <option>-N</option> option
+              is available to the Haskell program
+              via <literal>GHC.Conc.numCapabilities</literal>.</para>
          </listitem>
        </varlistentry>
       </variablelist>
@@ -1960,6 +1984,17 @@ f "2"    = 2
 
       <variablelist>
        <varlistentry>
+         <term><option>-qa</option></term>
+          <indexterm><primary><option>-qa</option></primary><secondary>RTS
+          option</secondary></indexterm>
+         <listitem>
+            <para>Use the OS's affinity facilities to try to pin OS
+              threads to CPU cores.  This is an experimental feature,
+              and may or may not be useful.  Please let us know
+              whether it helps for you!</para>
+          </listitem>
+        </varlistentry>
+       <varlistentry>
          <term><option>-qm</option></term>
           <indexterm><primary><option>-qm</option></primary><secondary>RTS
           option</secondary></indexterm>
@@ -1967,9 +2002,16 @@ f "2"    = 2
             <para>Disable automatic migration for load balancing.
             Normally the runtime will automatically try to schedule
             threads across the available CPUs to make use of idle
-            CPUs; this option disables that behaviour.  It is probably
-            only of use if you are explicitly scheduling threads onto
-            CPUs with <literal>GHC.Conc.forkOnIO</literal>.</para>
+            CPUs; this option disables that behaviour.  Note that
+              migration only applies to threads; sparks created
+              by <literal>par</literal> are load-balanced separately
+              by work-stealing.</para>
+
+            <para>
+              This option is probably only of use for concurrent
+              programs that explicitly schedule threads onto CPUs
+              with <literal>GHC.Conc.forkOnIO</literal>.
+            </para>
           </listitem>
         </varlistentry>
        <varlistentry>
@@ -2032,9 +2074,27 @@ f "2"    = 2
     <variablelist>
 
       <varlistentry>
+       <term><option>-msse2</option>:</term>
+       <listitem>
+          <para>
+            (x86 only, added in GHC 6.14.1) Use the SSE2 registers and
+            instruction set to implement floating point operations
+            when using the native code generator.  This gives a
+            substantial performance improvement for floating point,
+            but the resulting compiled code will only run on
+            processors that support SSE2 (Intel Pentium 4 and later,
+            or AMD Athlon 64 and later).
+          </para>
+          <para>
+            SSE2 is unconditionally used on x86-64 platforms.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
        <term><option>-monly-[32]-regs</option>:</term>
        <listitem>
-         <para>(iX86 machines)<indexterm><primary>-monly-N-regs
+         <para>(x86 only)<indexterm><primary>-monly-N-regs
           option (iX86 only)</primary></indexterm> GHC tries to
           &ldquo;steal&rdquo; four registers from GCC, for performance
           reasons; it almost always works.  However, when GCC is