+ x86 architecture
+ -------------------------------------------------------------------------- */
+
+#ifdef i386_HOST_ARCH
+
+StgThreadReturnCode
+StgRun(StgFunPtr f, StgRegTable *basereg) {
+
+ unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
+ StgThreadReturnCode r;
+
+ __asm__ volatile (
+ /*
+ * save callee-saves registers on behalf of the STG code.
+ */
+ "movl %%esp, %%eax\n\t"
+ "addl %4, %%eax\n\t"
+ "movl %%ebx,0(%%eax)\n\t"
+ "movl %%esi,4(%%eax)\n\t"
+ "movl %%edi,8(%%eax)\n\t"
+ "movl %%ebp,12(%%eax)\n\t"
+ /*
+ * Set BaseReg
+ */
+ "movl %3,%%ebx\n\t"
+ /*
+ * grab the function argument from the stack, and jump to it.
+ */
+ "movl %2,%%eax\n\t"
+ "jmp *%%eax\n\t"
+
+ ".global " STG_RETURN "\n"
+ STG_RETURN ":\n\t"
+
+ "movl %%esi, %%eax\n\t" /* Return value in R1 */
+
+ /*
+ * restore callee-saves registers. (Don't stomp on %%eax!)
+ */
+ "movl %%esp, %%edx\n\t"
+ "addl %4, %%edx\n\t"
+ "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
+ "movl 4(%%edx),%%esi\n\t"
+ "movl 8(%%edx),%%edi\n\t"
+ "movl 12(%%edx),%%ebp\n\t"
+
+ : "=&a" (r), "=m" (space)
+ : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
+ : "edx" /* stomps on %edx */
+ );
+
+ return r;
+}
+
+#endif
+
+/* ----------------------------------------------------------------------------
+ x86-64 is almost the same as plain x86.
+
+ I've done it using entirely inline assembler, because I couldn't
+ get gcc to generate the correct subtraction from %rsp by using
+ the local array variable trick. It didn't seem to reserve
+ enough space. Oh well, it's not much harder this way.
+
+ ------------------------------------------------------------------------- */
+
+#ifdef x86_64_HOST_ARCH
+
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ /*
+ * save callee-saves registers on behalf of the STG code.
+ */
+ ".globl StgRun\n"
+ "StgRun:\n\t"
+ "subq %0, %%rsp\n\t"
+ "movq %%rsp, %%rax\n\t"
+ "addq %0-48, %%rax\n\t"
+ "movq %%rbx,0(%%rax)\n\t"
+ "movq %%rbp,8(%%rax)\n\t"
+ "movq %%r12,16(%%rax)\n\t"
+ "movq %%r13,24(%%rax)\n\t"
+ "movq %%r14,32(%%rax)\n\t"
+ "movq %%r15,40(%%rax)\n\t"
+ /*
+ * Set BaseReg
+ */
+ "movq %%rsi,%%rbx\n\t"
+ /*
+ * grab the function argument from the stack, and jump to it.
+ */
+ "movq %%rdi,%%rax\n\t"
+ "jmp *%%rax\n\t"
+
+ ".global " STG_RETURN "\n"
+ STG_RETURN ":\n\t"
+
+ "movq %%r13, %%rax\n\t" /* Return value in R1 */
+
+ /*
+ * restore callee-saves registers. (Don't stomp on %%rax!)
+ */
+ "movq %%rsp, %%rdx\n\t"
+ "addq %0-48, %%rdx\n\t"
+ "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
+ "movq 8(%%rdx),%%rbp\n\t"
+ "movq 16(%%rdx),%%r12\n\t"
+ "movq 24(%%rdx),%%r13\n\t"
+ "movq 32(%%rdx),%%r14\n\t"
+ "movq 40(%%rdx),%%r15\n\t"
+ "addq %0, %%rsp\n\t"
+ "retq"
+
+ : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
+ /*
+ HACK alert!
+
+ The x86_64 ABI specifies that on a procedure call, %rsp is
+ aligned on a 16-byte boundary + 8. That is, the first
+ argument on the stack after the return address will be
+ 16-byte aligned.
+
+ Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
+ of 16 bytes.
+
+ BUT... when we do a C-call from STG land, gcc likes to put the
+ stack alignment adjustment in the prolog. eg. if we're calling
+ a function with arguments in regs, gcc will insert 'subq $8,%rsp'
+ in the prolog, to keep %rsp aligned (the return address is 8
+ bytes, remember). The mangler throws away the prolog, so we
+ lose the stack alignment.
+
+ The hack is to add this extra 8 bytes to our %rsp adjustment
+ here, so that throughout STG code, %rsp is 16-byte aligned,
+ ready for a C-call.
+
+ A quick way to see if this is wrong is to compile this code:
+
+ main = System.Exit.exitWith ExitSuccess
+
+ And run it with +RTS -sstderr. The stats code in the RTS, in
+ particular statsPrintf(), relies on the stack alignment because
+ it saves the %xmm regs on the stack, so it'll fall over if the
+ stack isn't aligned, and calling exitWith from Haskell invokes
+ shutdownHaskellAndExit using a C call.
+
+ Future gcc releases will almost certainly break this hack...
+ */
+}
+
+#endif /* x86-64 */
+
+/* -----------------------------------------------------------------------------
+ Sparc architecture
+
+ --
+ OLD COMMENT from GHC-3.02:
+
+ We want tailjumps to be calls, because `call xxx' is the only Sparc
+ branch that allows an arbitrary label as a target. (Gcc's ``goto
+ *target'' construct ends up loading the label into a register and
+ then jumping, at the cost of two extra instructions for the 32-bit
+ load.)
+
+ When entering the threaded world, we stash our return address in a
+ known location so that \tr{%i7} is available as an extra
+ callee-saves register. Of course, we have to restore this when
+ coming out of the threaded world.
+
+ I hate this god-forsaken architecture. Since the top of the
+ reserved stack space is used for globals and the bottom is reserved
+ for outgoing arguments, we have to stick our return address
+ somewhere in the middle. Currently, I'm allowing 100 extra
+ outgoing arguments beyond the first 6. --JSM
+
+ Updated info (GHC 4.06): we don't appear to use %i7 any more, so
+ I'm not sure whether we still need to save it. Incedentally, what
+ does the last paragraph above mean when it says "the top of the
+ stack is used for globals"? What globals? --SDM
+
+ Updated info (GHC 4.08.2): not saving %i7 any more (see below).