[commit: ghc] ghc-7.2: Keep the C stack pointer 16-byte aligned on all x86 platforms, not just Mac OS X (#5250). (cb55596)
Ian Lynagh
igloo at earth.li
Tue Jul 5 17:26:58 CEST 2011
Repository : ssh://darcs.haskell.org//srv/darcs/ghc
On branch : ghc-7.2
http://hackage.haskell.org/trac/ghc/changeset/cb555964127c8c6c66c8ee07a482374505ec29b9
>---------------------------------------------------------------
commit cb555964127c8c6c66c8ee07a482374505ec29b9
Author: Simon Marlow <marlowsd at gmail.com>
Date: Fri Jun 17 23:18:04 2011 +0100
Keep the C stack pointer 16-byte aligned on all x86 platforms, not just Mac OS X (#5250).
The OS X ABI requires the C stack pointer to be 16-byte aligned at a
function call. As far as I know this is not a requirement on other
x86 ABIs, but it seems that gcc is now generating SSE2 code that
assumes stack alignment (-mincoming-stack-boundary defaults to 4), so
we have to respect 16-byte alignment.
>---------------------------------------------------------------
compiler/nativeGen/X86/CodeGen.hs | 15 +++-------
rts/StgCRun.c | 54 ++++++++++++++++++++-----------------
2 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index a667c51..d191733 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1620,10 +1620,10 @@ genCCall target dest_regs args =
let
sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
raw_arg_size = sum sizes
- tot_arg_size = if isDarwin then roundTo 16 raw_arg_size else raw_arg_size
+ tot_arg_size = roundTo 16 raw_arg_size
arg_pad_size = tot_arg_size - raw_arg_size
delta0 <- getDeltaNat
- when isDarwin $ setDeltaNat (delta0 - arg_pad_size)
+ setDeltaNat (delta0 - arg_pad_size)
use_sse2 <- sse2Enabled
push_codes <- mapM (push_arg use_sse2) (reverse args)
@@ -1646,7 +1646,7 @@ genCCall target dest_regs args =
++ "probably because too many return values."
let push_code
- | isDarwin && (arg_pad_size /= 0)
+ | arg_pad_size /= 0
= toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
DELTA (delta0 - arg_pad_size)]
`appOL` concatOL push_codes
@@ -1657,10 +1657,9 @@ genCCall target dest_regs args =
-- but not for stdcall (callee does it)
--
-- We have to pop any stack padding we added
- -- on Darwin even if we are doing stdcall, though (#5052)
+ -- even if we are doing stdcall, though (#5052)
pop_size | cconv /= StdCallConv = tot_arg_size
- | isDarwin = arg_pad_size
- | otherwise = 0
+ | otherwise = arg_pad_size
call = callinsns `appOL`
toOL (
@@ -1703,10 +1702,6 @@ genCCall target dest_regs args =
assign_code dest_regs)
where
- isDarwin = case platformOS (targetPlatform dflags) of
- OSDarwin -> True
- _ -> False
-
arg_size :: CmmType -> Int -- Width in bytes
arg_size ty = widthInBytes (typeWidth ty)
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index e28353c..a5a4e93 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -128,18 +128,29 @@ StgFunPtr StgReturn(void)
#define STG_GLOBAL ".global "
#endif
-StgRegTable *
-StgRun(StgFunPtr f, StgRegTable *basereg) {
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ STG_GLOBAL STG_RUN "\n"
+ STG_RUN ":\n\t"
- unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
- StgRegTable * r;
+ /*
+ * move %esp down to reserve an area for temporary storage
+ * during the execution of STG code.
+ *
+ * The stack pointer has to be aligned to a multiple of 16
+ * bytes from here - this is a requirement of the C ABI, so
+ * that C code can assign SSE2 registers directly to/from
+ * stack locations.
+ */
+ "subl %0, %%esp\n\t"
- __asm__ volatile (
/*
* save callee-saves registers on behalf of the STG code.
*/
- "movl %%esp, %%eax\n\t"
- "addl %4, %%eax\n\t"
+ "movl %%esp, %%eax\n\t"
+ "addl %0-16, %%eax\n\t"
"movl %%ebx,0(%%eax)\n\t"
"movl %%esi,4(%%eax)\n\t"
"movl %%edi,8(%%eax)\n\t"
@@ -147,25 +158,17 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
/*
* Set BaseReg
*/
- "movl %3,%%ebx\n\t"
+ "movl 24(%%eax),%%ebx\n\t"
/*
* grab the function argument from the stack
*/
- "movl %2,%%eax\n\t"
-
- /*
- * Darwin note:
- * The stack pointer has to be aligned to a multiple of 16 bytes at
- * this point. This works out correctly with gcc 4.0.1, but it might
- * break at any time in the future. TODO: Make this future-proof.
- */
-
- /*
+ "movl 20(%%eax),%%eax\n\t"
+ /*
* jump to it
*/
"jmp *%%eax\n\t"
- STG_GLOBAL STG_RETURN "\n"
+ STG_GLOBAL STG_RETURN "\n"
STG_RETURN ":\n\t"
"movl %%esi, %%eax\n\t" /* Return value in R1 */
@@ -174,18 +177,19 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
* restore callee-saves registers. (Don't stomp on %%eax!)
*/
"movl %%esp, %%edx\n\t"
- "addl %4, %%edx\n\t"
+ "addl %0, %%edx\n\t"
"movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
"movl 4(%%edx),%%esi\n\t"
"movl 8(%%edx),%%edi\n\t"
"movl 12(%%edx),%%ebp\n\t"
- : "=&a" (r), "=m" (space)
- : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
- : "edx" /* stomps on %edx */
- );
+ "addl %0, %%esp\n\t"
+ "ret"
- return r;
+ : : "i" (RESERVED_C_STACK_BYTES + 16 + 12)
+ // + 16 to make room for the 4 registers we have to save
+ // + 12 because we need to align %esp to a 16-byte boundary (#5250)
+ );
}
#endif
More information about the Cvs-ghc
mailing list