HaskellWiki

Haskell | Wiki community | Recent changes
Random page | Special pages

 

Not logged in
Log in | Help

Shootout/Nbody

< Shootout

Categories: Code

This is a ShootoutEntry for the N-body benchmark.

Each program should model the orbits of Jovian planets, using the same simple symplectic-integrator - see the Java program.

Correct output N = 1000 is

-0.169075164

-0.169087605

Contents

1 Benchmarks

Finally, we get some decent performance. Timing on Linux/P4, N=1,000,000


Author Time Flags
current 8.561 -O2 -optc-O3 -optc-ffast-math
current-fexcess-precision 6.335 -O2 -fexcess-precision -optc-O3 -optc-ffast-math
chris 4.515 -O2
chris 2.547 -O2 -fexcess-precision
chris 2.363 -O2 -fexcess-precision -optc-O -optc-ffast-math
C 1.174
chris+dons 1.112 -O2 -fexcess-precision -optc-O -optc-ffast-math
stuarray 1.073 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
stuarray2 0.912 -O3 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
C 0.518 -O2

Again with N=5,000,000 (as used in shootout):

Author Time Flags
current 42.685 -O2 -optc-O3 -optc-ffast-math
chris 11.753 -O2 -fexcess-precision -optc-O -optc-ffast-math
C 5.786
chris+dons 5.498 -O2 -fexcess-precision -optc-O -optc-ffast-math
stuarray 5.290 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
stuarray2 4.489 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
C 2.555 -O2

Hooray!

Need to add my smaller/faster tweak to Joel's entry. -- ChrisKuklewicz

On x86/Linux gcc produces weird numerical errors with the new IOUArray entries (chris and chris+dons), when using the -optc-Ox flags. It cannot be used above -optc-O1

chris ERROR -O2 -optc-O3 -optc-ffast-math
chris ERROR -O2 -optc-O2 -fexcess-precision -optc-ffast-math

1.1 Architecture differences

This is very CPU architecture dependent, which means a powerbook G4 is not seeing the same results as linux/x86.

Compiling both with -fglasgow-exts -fexcess-precision -O3 -optc-O3 -optc-ffast-math on a G4:

   $ time ./chris 5000000; time ./dons+chris 5000000
   -0.169075164
   -0.169083134
   real    0m27.794s
   user    0m21.176s
   sys     0m0.268s
   -0.169075164
   -0.169083134
   real    0m25.985s
   user    0m22.357s
   sys     0m0.236s

On linux/x86 {{{dons+chris}}} beats {{{chris}}} by a factor of two and on a G4 {{{dons+chris}}} is a few percent slower. Not too surprising, since the floating point hardware differs. The performanace for the this benchmark (AMD Sempron, Inten P4) is untunable on a G4.

So Don: you can submit this when you are happy with the tweaking since I am blind to x86 performance here. Can you try passing architecture switches to -optc such as -mtune? -- ChrisKuklewicz

2 Conservative extension

Replace hardcoded sizeof double with Storable instance sizeOf.

{-# OPTIONS -fvia-C -fexcess-precision #-}
--
-- The Computer Language Benchmarks Game
-- http://shootout.alioth.debian.org/
--
-- Contributed by Olof Kraigher and Don Stewart.
--
-- To be compiled with:
--
--  -O2 -fglasgow-exts -funbox-strict-fields -fbang-patterns -optc-O
--
-- Don't enable -optc-mfpmath=sse -optc-msse2, this triggers a gcc bug on x86
--
-- TODO, rewrite in ST.
--
 
import Foreign
import Foreign.Storable
import Foreign.Marshal.Alloc
import Data.IORef
import Control.Monad
import System
import Text.Printf
 
main = do
    n <- getArgs >>= readIO.head
    initialize
    offset_momentum
    energy 0 planets >>= printf "%.9f\n"
    replicateM_ n (advance planets)
    energy 0 planets >>= printf "%.9f\n"
 
offset_momentum = do
    m <- foldr (.+.) (Vec 0 0 0)
             `fmap` (mapM momentum
                   . take (nbodies - 1)
                   . iterate next $ next planets)
 
    setVec (vel planets) $ (-1/solar_mass) *. m
  where
    momentum p = liftM2 (*.) (mass p) (getVec (vel p))
 
energy :: Double -> Ptr Double -> IO Double
energy !e p
    | p == end = return e
    | otherwise      = do
        p1 <- getVec (pos p)
        v1 <- getVec (vel p)
        m1 <- mass p
        e  <- energy2 p1 m1 e p2
        energy (e + 0.5 * m1 * magnitude2 v1) p2
    where p2 = next p
 
energy2 :: Vector3 -> Double -> Double -> Ptr Double -> IO Double
energy2 p1 m1 !e p
    | p  == end = return e
    | otherwise = do
        p2 <- getVec (pos p)
        v2 <- getVec (vel p)
        m2 <- mass p
        let distance = sqrt . magnitude2 $ p1 .-. p2
        energy2 p1 m1 (e - m1 * m2 / distance) (next p)
 
advance :: Ptr Double -> IO ()
advance p1 = when (p1 /= end) $ do
    pos1 <- getVec (pos p1)
    m1   <- mass p1
    go pos1 m1 p2
    advance  p2
 where
    vel1 = vel p1
    p2   = next p1
 
    go pos1 m1 p2
         | p2 /= end = do
                pos2 <- getVec (pos p2)
                m2   <- mass p2
                let vel2       = vel p2
                    difference = pos1 .-. pos2
                    distance2  = magnitude2 difference
                    distance   = sqrt distance2
                    magnitude  = delta_t / (distance2 * distance)
                    mass_magn  = magnitude *. difference
                vel1 -= m2 *. mass_magn
                vel2 += m1 *. mass_magn
                go pos1 m1 (next p2)
 
          | otherwise = do
                v1 <- getVec vel1
                p1 += delta_t *. v1
 
------------------------------------------------------------------------
 
planets :: Ptr Double
planets = unsafePerformIO $ mallocBytes (7 * nbodies * sizeOf (undefined :: Double))
 
nbodies :: Int
nbodies = 5
 
solar_mass, delta_t, days_per_year :: Double
days_per_year = 365.24
delta_t       = 0.01
solar_mass    = 4 * pi ** 2
 
initialize = mapM_ newPlanet planets
  where
   dp = days_per_year
   planets =
    [0, 0, 0,
     0, 0, 0,
     1 * solar_mass,
     4.84143144246472090e+00,        (-1.16032004402742839e+00), (-1.03622044471123109e-01),
     1.66007664274403694e-03*dp,     7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp,
     9.54791938424326609e-04 * solar_mass,
 
     8.34336671824457987e+00,        4.12479856412430479e+00,    (-4.03523417114321381e-01),
     (-2.76742510726862411e-03)*dp,  4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp,
     2.85885980666130812e-04 * solar_mass,
 
     1.28943695621391310e+01,        (-1.51111514016986312e+01), (-2.23307578892655734e-01),
     2.96460137564761618e-03*dp,     2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp,
     4.36624404335156298e-05 * solar_mass,
 
     1.53796971148509165e+01,        (-2.59193146099879641e+01), 1.79258772950371181e-01,
     2.68067772490389322e-03*dp,     1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp,
     5.15138902046611451e-05 * solar_mass
    ]
 
------------------------------------------------------------------------
-- Support for 3 dimensional mutable vectors
 
cursor :: IORef (Ptr Double)
cursor = unsafePerformIO $ newIORef planets
 
data Vector3 = Vec !Double !Double !Double
 
end :: Ptr Double
end = inc planets $! nbodies * 7
 
next  :: Ptr Double -> Ptr Double
next p = inc p 7
 
inc :: Ptr Double -> Int -> Ptr Double
inc ptr n = plusPtr ptr (n * sizeOf (undefined :: Double))
 
newPlanet :: Double -> IO ()
newPlanet d = do
    ptr <- readIORef cursor
    pokeElemOff ptr 0 d
    writeIORef cursor $! inc ptr 1
 
pos :: Ptr Double -> Ptr Double
pos ptr = ptr
 
vel :: Ptr Double -> Ptr Double
vel ptr = inc ptr 3
 
mass :: Ptr Double -> IO Double
mass ptr = peekElemOff ptr 6
 
------------------------------------------------------------------------
 
(Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w)
 
(Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w)
 
k *. (Vec x y z) = Vec (k*x) (k*y) (k*z) -- allocates
 
magnitude2 (Vec x y z) = x*x + y*y + z*z
 
------------------------------------------------------------------------
 
getVec p = do
    x <- peek p
    y <- peekElemOff p 1
    z <- peekElemOff p 2
    return $! Vec x y z
 
setVec p (Vec x y z)= do
    poke        p   x
    pokeElemOff p 1 y
    pokeElemOff p 2 z
 
infix 4 +=
infix 4 -=
 
v1 += (Vec u v w) = do
    x <- peek v1;          poke        v1   (x+u)
    y <- peekElemOff v1 1; pokeElemOff v1 1 (y+v)
    z <- peekElemOff v1 2; pokeElemOff v1 2 (z+w)
 
v1 -= (Vec u v w)  = do
    x <- peek v1;          poke        v1   (x-u)
    y <- peekElemOff v1 1; pokeElemOff v1 1 (y-v)
    z <- peekElemOff v1 2; pokeElemOff v1 2 (z-w)


3 Current entry

Improves on the one below, shorter and at tad faster. I removed the unnecessary inc function and it runs a bit faster. // Olof

{-# OPTIONS -fexcess-precision #-}
-- 
-- The Computer Language Shootout
-- http://shootout.alioth.debian.org/
--        
-- Contributed by Olof Kraigher, with help from Don Stewart.
--     
-- Compile with:
--
--  -funbox-strict-fields -fglasgow-exts -fbang-patterns -O3
--      -optc-O3 -optc-mfpmath=sse -optc-msse2 -optc-march=pentium4 
-- 
 
import Foreign
import Foreign.Storable
import Foreign.Marshal.Alloc
import Data.IORef
import Control.Monad
import System
import Text.Printf
 
main = do
    n <- getArgs >>= readIO.head
    initialize
    offset_momentum
    energy 0 planets >>= printf "%.9f\n"
    replicateM_ n (advance planets)
    energy 0 planets >>= printf "%.9f\n"
 
offset_momentum = do
    m <- foldr (.+.) (Vec 0 0 0)
             `fmap` (mapM momentum
                   . take (nbodies - 1)
                   . iterate next $ next planets)
 
    setVec (vel planets) $ (-1/solar_mass) *. m
  where
    momentum !p = liftM2 (*.) (mass p) (getVec (vel p))
 
energy :: Double -> Ptr Double -> IO Double
energy !e !p
    | p == end = return e
    | otherwise      = do
        p1 <- getVec (pos p)
        v1 <- getVec (vel p)
        m1 <- mass p
        e  <- energy2 p1 m1 e p2
        energy (e + 0.5 * m1 * magnitude2 v1) p2
    where p2 = next p
 
energy2 !p1 !m1 !e !p
    | p  == end = return e
    | otherwise = do
        p2 <- getVec (pos p)
        v2 <- getVec (vel p)
        m2 <- mass p
        let distance = sqrt . magnitude2 $ p1 .-. p2
        energy2 p1 m1 (e - m1 * m2 / distance) (next p)
 
advance :: Ptr Double -> IO ()
advance !p1 = when (p1 /= end) $ do
    pos1 <- getVec $ pos p1
    m1   <- mass p1
    let go !p2
            | p2 /= end = do
                pos2 <- getVec (pos p2)
                m2   <- mass p2
                let vel2       = vel p2
                    difference = pos1 .-. pos2
                    distance2  = magnitude2 difference
                    distance   = sqrt distance2
                    magnitude  = delta_t / (distance2 * distance)
                    mass_magn  = magnitude *. difference
                vel1 -= m2 *. mass_magn
                vel2 += m1 *. mass_magn
                go (next p2)
 
            | otherwise = do
                v1 <- getVec vel1
                p1 += delta_t *. v1
    go p2
    advance  p2
  where
    vel1 = vel p1
    p2   = next p1
 
------------------------------------------------------------------------
 
planets :: Ptr Double
planets = unsafePerformIO $ mallocBytes (7 * nbodies * 8) -- sizeOf double = 8 
 
nbodies :: Int
nbodies = 5
 
solar_mass, delta_t, days_per_year :: Double
days_per_year = 365.24
solar_mass    = 4 * pi ** 2;
delta_t       = 0.01
 
initialize = mapM_ newPlanet planets
  where
   dp = days_per_year
   planets =
    [0, 0, 0,
     0, 0, 0,
     1 * solar_mass,
     4.84143144246472090e+00,        (-1.16032004402742839e+00), (-1.03622044471123109e-01),
     1.66007664274403694e-03*dp,     7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp,
     9.54791938424326609e-04 * solar_mass,
 
     8.34336671824457987e+00,        4.12479856412430479e+00,    (-4.03523417114321381e-01),
     (-2.76742510726862411e-03)*dp,  4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp,
     2.85885980666130812e-04 * solar_mass,
 
     1.28943695621391310e+01,        (-1.51111514016986312e+01), (-2.23307578892655734e-01),
     2.96460137564761618e-03*dp,     2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp,
     4.36624404335156298e-05 * solar_mass,
 
     1.53796971148509165e+01,        (-2.59193146099879641e+01), 1.79258772950371181e-01,
     2.68067772490389322e-03*dp,     1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp,
     5.15138902046611451e-05 * solar_mass
    ]
 
------------------------------------------------------------------------
-- Support for 3 dimensional mutable vectors
 
data Vector3 = Vec !Double !Double !Double
 
end :: Ptr Double
end = plusPtr planets $ nbodies * 7 * 8
 
next  :: Ptr Double -> Ptr Double
next p = plusPtr p 56
 
cursor :: IORef (Ptr Double)
cursor = unsafePerformIO $ newIORef planets
 
newPlanet :: Double -> IO ()
newPlanet !d = do
    ptr <- readIORef cursor
    pokeElemOff ptr 0 d
    writeIORef cursor (plusPtr ptr 8)
 
pos :: Ptr Double -> Ptr Double
pos ptr = ptr
 
vel :: Ptr Double -> Ptr Double
vel ptr = plusPtr ptr 24
 
mass :: Ptr Double -> IO Double
mass ptr = peekElemOff ptr 6
 
------------------------------------------------------------------------
 
(Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w)
 
(Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w)
 
k *. (Vec x y z) = Vec (k*x) (k*y) (k*z) -- allocates
 
magnitude2 (Vec x y z) = x*x + y*y + z*z
 
------------------------------------------------------------------------
 
getVec !p = liftM3 Vec (peek p) (f 1) (f 2)
    where f = peekElemOff p
 
setVec p (Vec x y z)= do
    poke        p   x
    pokeElemOff p 1 y
    pokeElemOff p 2 z
 
infix 4 +=
infix 4 -=
 
v1 += (Vec u v w) = do
    x <- peek v1;          poke        v1   (x+u)
    y <- peekElemOff v1 1; pokeElemOff v1 1 (y+v)
    z <- peekElemOff v1 2; pokeElemOff v1 2 (z+w)
 
v1 -= (Vec u v w)  = do
    x <- peek v1;          poke        v1   (x-u)
    y <- peekElemOff v1 1; pokeElemOff v1 1 (y-v)
    z <- peekElemOff v1 2; pokeElemOff v1 2 (z-w)

4 Proposed entry

A fast and relatively nice entry that runs 1.5x the fastest c++ entry.

{-# OPTIONS_GHC -fbang-patterns -fexcess-precision#-}
 
{-
    The Computer Language Shootout
       http://shootout.alioth.debian.org/
       
    Contributed by Olof Kraigher, with help from Don Stewart.
    
    Compile with:
        -fglasgow-exts -fbang-patterns -fexcess-precision -O3 -optc-O2 -optc-mfpmath=sse -optc-msse2 -optc-march=pentium4
-}
 
import Foreign.Ptr; import Foreign.Storable;    import Foreign.Marshal.Alloc;   import Foreign(unsafePerformIO);
import Data.IORef;  import Control.Monad;       import System;                  import Text.Printf;
 
data Vector3 = Vec !Double !Double !Double
 
nr_of_planets :: Int
nr_of_planets = 5
 
delta_t :: Double
delta_t = 0.01
 
days_per_year :: Double
days_per_year = 365.24
 
solar_mass :: Double
solar_mass = 4 * pi ** 2;
 
planets :: Ptr Double
planets = unsafePerformIO $ mallocBytes (7 * nr_of_planets * 8) -- sizeOf double = 8 
 
end :: Ptr Double
end = inc planets $ nr_of_planets * 7
 
cursor :: IORef (Ptr Double)
cursor = unsafePerformIO $ newIORef planets
 
{-# INLINE inc #-}
inc :: Ptr Double -> Int -> Ptr Double
inc ptr n = plusPtr ptr $ n * 8
 
{-# INLINE new #-}
new :: Double -> IO ()
new !d = do
    ptr <- readIORef cursor
    pokeElemOff ptr 0 d
    writeIORef cursor (inc ptr 1)
 
{-# INLINE getVec #-}
getVec :: Ptr Double -> IO Vector3
getVec ptr = 
    let p = peekElemOff ptr in
    liftM3 Vec (p 0) (p 1) (p 2)
 
{-# INLINE setVec #-}
setVec :: Ptr Double -> Vector3 -> IO ()
setVec ptr (Vec x y z)= do
    pokeElemOff ptr 0 x
    pokeElemOff ptr 1 y
    pokeElemOff ptr 2 z
 
{-# INLINE pos #-}    
pos :: Ptr Double -> Ptr Double
pos ptr = ptr
 
{-# INLINE vel #-}
vel :: Ptr Double -> Ptr Double
vel ptr = inc ptr 3
 
{-# INLINE mass #-}
mass :: Ptr Double -> IO Double
mass ptr = peekElemOff ptr 6
 
{-# INLINE (.+.) #-}
(.+.) :: Vector3 -> Vector3 -> Vector3
(Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w)
 
{-# INLINE (.-.) #-}
(.-.) :: Vector3 -> Vector3 -> Vector3
(Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w)
 
{-# INLINE (*.) #-}
(*.) :: Double -> Vector3 -> Vector3
k *. (Vec x y z) = Vec (k*x) (k*y) (k*z)
 
{-# INLINE magnitude2 #-}
magnitude2 :: Vector3 -> Double
magnitude2 (Vec x y z) = x*x + y*y + z*z
 
infix 4 +=
infix 4 -=
 
{-# INLINE (+=) #-}
(+=) :: Ptr Double -> Vector3 -> IO ()
v1 += v2 = do 
    v1' <- getVec v1
    setVec v1 $ v1' .+. v2
 
{-# INLINE (-=) #-}
(-=) :: Ptr Double -> Vector3 -> IO ()
v1 -= v2 = do 
    v1' <- getVec v1
    setVec v1 $ v1' .-. v2
 
initialize :: IO ()
initialize =
    let dp = days_per_year in
    mapM_ new
        [   0, 0, 0,
            0, 0, 0,
            1 * solar_mass, 
            4.84143144246472090e+00,        (-1.16032004402742839e+00), (-1.03622044471123109e-01),
            1.66007664274403694e-03*dp,     7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp,
            9.54791938424326609e-04 * solar_mass,
        
            8.34336671824457987e+00,        4.12479856412430479e+00,    (-4.03523417114321381e-01),
            (-2.76742510726862411e-03)*dp,  4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp,
            2.85885980666130812e-04 * solar_mass,
        
            1.28943695621391310e+01,        (-1.51111514016986312e+01), (-2.23307578892655734e-01),
            2.96460137564761618e-03*dp,     2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp,
            4.36624404335156298e-05 * solar_mass,
        
            1.53796971148509165e+01,        (-2.59193146099879641e+01), 1.79258772950371181e-01,
            2.68067772490389322e-03*dp,     1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp,
            5.15138902046611451e-05 * solar_mass
        ]
 
 
advance :: IO ()
advance = do
    let advance' !planet1
            | planet1 /= end = do
                p1 <- getVec $ pos planet1
                m1  <- mass planet1
                let vel1 = vel planet1
                
                let advance'' !planet2
                        | planet2 /= end = do
                            p2 <- getVec $ pos planet2
                            m2 <- mass planet2
                            let vel2 = vel planet2
                            let difference = p1 .-. p2
                            let distance2 = magnitude2 difference
                            let distance = sqrt distance2
                            let magnitude = delta_t / (distance2 * distance)
                            let mass_magn = magnitude *. difference
                            vel1 -= (m2 *. mass_magn)
                            vel2 += (m1 *. mass_magn)
                            advance'' (inc planet2 7)
                            
                        | otherwise = do
                            v1 <- getVec vel1
                            planet1 += delta_t *. v1
                
                let planet1' = (inc planet1 7)
                advance'' planet1'
                advance' planet1'
                
            | otherwise = return ()
                
    advance' planets
 
energy :: IO Double
energy = do
    e <- newIORef 0
    
    let energy' !planet1
            | planet1 /= end = do
                p1 <- getVec $ pos planet1
                v1 <- getVec $ vel planet1
                m1 <- mass planet1
    
                let energy'' !planet2
                        | planet2 /= end = do
                            p2 <- getVec $ pos planet2
                            v2 <- getVec $ vel planet2
                            m2 <- mass planet2
                            let distance = sqrt $ magnitude2 $ p1 .-. p2
                            modifyIORef e (\x -> x - m1 * m2 / distance)
                            energy'' (inc planet2 7)
 
                        | otherwise = do
                            modifyIORef e (+ 0.5 * m1 * (magnitude2 v1))
                
                let planet1' = (inc planet1 7)
                energy'' planet1'
                energy' planet1'
    
            | otherwise = return ()
                
    energy' planets
    readIORef e
 
offset_momentum :: IO ()
offset_momentum = do
    let momentum' !planet = do
        v <- getVec $ vel planet
        m <- mass planet
        return $ m *. v
    
    momentum <- return.(foldl (.+.) (Vec 0 0 0)) =<< 
        (mapM momentum' $ take (nr_of_planets - 1) $ iterate (\x -> inc x 7) (inc planets 7))
 
    setVec (vel planets) $ (-1/solar_mass) *. momentum
 
main = do
    n <- getArgs >>= readIO.head
    initialize
    offset_momentum
    energy >>= printf "%.9f\n"
    replicateM_ n advance
    energy >>= printf "%.9f\n"

5 Proposed entry

Complete rewrite, attention paid down to the asm level on the inner loops. Very much faster.

Note that in GHC -fexcess-precision is ignored on the command line!

Submitted

{-# OPTIONS -fexcess-precision #-}
--
-- The Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Don Stewart
--
-- Compile with:
--  -O -fglasgow-exts -fbang-patterns -optc-O3 -optc-march=pentium4
--
import System
import System.IO.Unsafe
import Text.Printf
import Foreign.Marshal.Array
import Foreign
import Control.Monad
import Bits
 
import GHC.Exts
 
------------------------------------------------------------------------
 
main = do
    n <- getArgs >>= readIO . head
    offset_momentum
    printf "%.9f\n" =<< energy
    replicateM_ n advance
    printf "%.9f\n" =<< energy
 
------------------------------------------------------------------------
 
type Bodies = Ptr Double
 
nbodies = 5
 
bodies :: Bodies
bodies = unsafePerformIO . newArray . concat $
   [planet 1 0 0 0 0 0 0                -- sun
   ,planet 9.54791938424326609e-04      -- jupiter
        4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
      ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
   ,planet 2.85885980666130812e-04      -- saturn
        8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
      (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
   ,planet 4.36624404335156298e-05      -- uranus
        1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
      ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
   ,planet 5.15138902046611451e-05      -- neptune
        1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
      ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
  where
    planet m x y z vx vy vz =
        [m*solar_mass,x, y, z, vx*days_per_year, vy*days_per_year, vz*days_per_year, 0]
    -- n.b widht of 8 doubles to make bitwise math nice
 
-- field access
(W# body) `at` (W# field) = I#
    (word2Int# (field `or#` (body `uncheckedShiftL#` 3#))) -- sizeof Double
 
put !body !field  = pokeElemOff bodies (body `at` field)
get !body !field  = peekElemOff bodies (body `at` field)
 
--
-- field offsets in flattened array
--
mass = 0
x    = 1
y    = 2
z    = 3
vx   = 4
vy   = 5
vz   = 6
solar_mass    = 4 * pi * pi
days_per_year = 365.24
 
------------------------------------------------------------------------
-- Offset momentum
 
offset_momentum :: IO ()
offset_momentum = do
    (px,py,pz) <- go 0 0 0 0
    put 0 vx (- px / solar_mass)
    put 0 vy (- py / solar_mass)
    put 0 vz (- pz / solar_mass)
  where
    go !i !px !py !pz
        | i >= nbodies = return (px,py,pz)
        | otherwise    = do
            imass <- look mass
            ivx   <- look vx
            ivy   <- look vy
            ivz   <- look vz
            go (i+1) (px + ivx * imass)
                     (py + ivy * imass)
                     (pz + ivz * imass)
 
        where look = get i
 
------------------------------------------------------------------------
-- Energy
 
energy = goI 0 0
 
goI :: Word -> Double -> IO Double
goI !i !e
    | i >= nbodies = return e
    | otherwise    = do
       im   <- look mass
       ix   <- look x      -- cache the body offset calculation?
       iy   <- look y
       iz   <- look z
       ivx  <- look vx
       ivy  <- look vy
       ivz  <- look vz
       e'   <- goJ ix iy iz im (i+1) (e + 0.5 * im * (ivx * ivx + ivy * ivy + ivz * ivz))
       goI (i+1) e'
    where
       look = get i
 
goJ !ix !iy !iz !im !j !e
    | j >= nbodies = return e
    | otherwise    = do
 
        b2m  <- look mass
        b2x  <- look x
        b2y  <- look y
        b2z  <- look z
        let dx   = ix - b2x
            dy   = iy - b2y
            dz   = iz - b2z
            distance = sqrt $! dx * dx + dy