Personal tools

Shootout/Nbody

From HaskellWiki

< Shootout(Difference between revisions)
Jump to: navigation, search
(Fastest entry)
(update nbody)
Line 97: Line 97:
   
 
== Proposed entry ==
 
== Proposed entry ==
  +
  +
Complete rewrite, attention paid down to the asm level on the inner
  +
loops. Very much faster.
  +
  +
Note that in GHC -fexcess-precision is ignored on the command line!
  +
  +
[https://alioth.debian.org/tracker/index.php?func=detail&aid=304452&group_id=30402&atid=411646 Submitted]
  +
  +
<haskell>
  +
{-# OPTIONS -fexcess-precision #-}
  +
--
  +
-- The Computer Language Shootout
  +
-- http://shootout.alioth.debian.org/
  +
--
  +
-- Contributed by Don Stewart
  +
--
  +
-- Compile with:
  +
-- -O -fglasgow-exts -fbang-patterns -optc-O3 -optc-march=pentium4
  +
--
  +
import System
  +
import System.IO.Unsafe
  +
import Text.Printf
  +
import Foreign.Marshal.Array
  +
import Foreign
  +
import Control.Monad
  +
import Bits
  +
  +
import GHC.Exts
  +
  +
------------------------------------------------------------------------
  +
  +
main = do
  +
n <- getArgs >>= readIO . head
  +
offset_momentum
  +
printf "%.9f\n" =<< energy
  +
replicateM_ n advance
  +
printf "%.9f\n" =<< energy
  +
  +
------------------------------------------------------------------------
  +
  +
type Bodies = Ptr Double
  +
  +
nbodies = 5
  +
  +
bodies :: Bodies
  +
bodies = unsafePerformIO . newArray . concat $
  +
[planet 1 0 0 0 0 0 0 -- sun
  +
,planet 9.54791938424326609e-04 -- jupiter
  +
4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01)
  +
( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
  +
,planet 2.85885980666130812e-04 -- saturn
  +
8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01)
  +
(-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
  +
,planet 4.36624404335156298e-05 -- uranus
  +
1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01)
  +
( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
  +
,planet 5.15138902046611451e-05 -- neptune
  +
1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01
  +
( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
  +
where
  +
planet m x y z vx vy vz =
  +
[m*solar_mass,x, y, z, vx*days_per_year, vy*days_per_year, vz*days_per_year, 0]
  +
-- n.b widht of 8 doubles to make bitwise math nice
  +
  +
-- field access
  +
(W# body) `at` (W# field) = I#
  +
(word2Int# (field `or#` (body `uncheckedShiftL#` 3#))) -- sizeof Double
  +
  +
put !body !field = pokeElemOff bodies (body `at` field)
  +
get !body !field = peekElemOff bodies (body `at` field)
  +
  +
--
  +
-- field offsets in flattened array
  +
--
  +
mass = 0
  +
x = 1
  +
y = 2
  +
z = 3
  +
vx = 4
  +
vy = 5
  +
vz = 6
  +
solar_mass = 4 * pi * pi
  +
days_per_year = 365.24
  +
  +
------------------------------------------------------------------------
  +
-- Offset momentum
  +
  +
offset_momentum :: IO ()
  +
offset_momentum = do
  +
(px,py,pz) <- go 0 0 0 0
  +
put 0 vx (- px / solar_mass)
  +
put 0 vy (- py / solar_mass)
  +
put 0 vz (- pz / solar_mass)
  +
where
  +
go !i !px !py !pz
  +
| i >= nbodies = return (px,py,pz)
  +
| otherwise = do
  +
imass <- look mass
  +
ivx <- look vx
  +
ivy <- look vy
  +
ivz <- look vz
  +
go (i+1) (px + ivx * imass)
  +
(py + ivy * imass)
  +
(pz + ivz * imass)
  +
  +
where look = get i
  +
  +
------------------------------------------------------------------------
  +
-- Energy
  +
  +
energy = goI 0 0
  +
  +
goI :: Word -> Double -> IO Double
  +
goI !i !e
  +
| i >= nbodies = return e
  +
| otherwise = do
  +
im <- look mass
  +
ix <- look x -- cache the body offset calculation?
  +
iy <- look y
  +
iz <- look z
  +
ivx <- look vx
  +
ivy <- look vy
  +
ivz <- look vz
  +
e' <- goJ ix iy iz im (i+1) (e + 0.5 * im * (ivx * ivx + ivy * ivy + ivz * ivz))
  +
goI (i+1) e'
  +
where
  +
look = get i
  +
  +
goJ !ix !iy !iz !im !j !e
  +
| j >= nbodies = return e
  +
| otherwise = do
  +
  +
b2m <- look mass
  +
b2x <- look x
  +
b2y <- look y
  +
b2z <- look z
  +
let dx = ix - b2x
  +
dy = iy - b2y
  +
dz = iz - b2z
  +
distance = sqrt $! dx * dx + dy * dy + dz * dz
  +
goJ ix iy iz im (j+1) (e - ((im * b2m) / distance))
  +
where
  +
look = get j
  +
  +
------------------------------------------------------------------------
  +
-- Advance
  +
  +
advance :: IO ()
  +
advance = advanceI 0 >> update
  +
  +
advanceI i@(W# ii) = when (i < nbodies) $ do
  +
bm <- lookI mass
  +
bx <- lookI x -- cache the body offset calculation!
  +
by <- lookI y
  +
bz <- lookI z
  +
bvx <- lookI vx -- float these out to an accumultor
  +
bvy <- lookI vy
  +
bvz <- lookI vz
  +
advanceJ off bm bx by bz bvx bvy bvz (i+1)
  +
advanceI (i+1)
  +
where
  +
off = W# (ii `uncheckedShiftL#` 3#)
  +
lookI x = peekElemOff bodies (fromIntegral $ x .|. off)
  +
setI = put i
  +
  +
advanceJ !i_off !bm !bx !by !bz !bvx !bvy !bvz !j@(W# jj)
  +
| j >= nbodies = do
  +
let setI x = pokeElemOff bodies (fromIntegral $ x .|. i_off)
  +
setI vx bvx
  +
setI vy bvy
  +
setI vz bvz
  +
  +
| otherwise = do
  +
  +
b2mass <- lookJ mass
  +
b2x <- lookJ x
  +
b2y <- lookJ y
  +
b2z <- lookJ z
  +
b2vx <- lookJ vx
  +
b2vy <- lookJ vy
  +
b2vz <- lookJ vz
  +
  +
let dx = bx - b2x
  +
dy = by - b2y
  +
dz = bz - b2z
  +
distance = sqrt (dx * dx + dy * dy + dz * dz)
  +
mag = 0.01 / (distance * distance * distance)
  +
massmag = bm * mag
  +
  +
setJ vx (b2vx + dx * massmag)
  +
setJ vy (b2vy + dy * massmag)
  +
setJ vz (b2vz + dz * massmag)
  +
  +
let mass2mag = - b2mass * mag
  +
  +
advanceJ i_off bm bx by bz (bvx + dx * mass2mag)
  +
(bvy + dy * mass2mag)
  +
(bvz + dz * mass2mag) (j+1)
  +
where
  +
off = W# (jj `uncheckedShiftL#` 3#)
  +
lookJ x = peekElemOff bodies (fromIntegral $ x .|. off)
  +
setJ x = pokeElemOff bodies (fromIntegral $ x .|. off)
  +
  +
update = forM_ [0..nbodies-1] $ \i -> do
  +
let look = get i
  +
set = put i
  +
bx <- look x
  +
by <- look y
  +
bz <- look z
  +
bvx <- look vx
  +
bvy <- look vy
  +
bvz <- look vz
  +
set x (bx + 0.01 * bvx)
  +
set y (by + 0.01 * bvy)
  +
set z (bz + 0.01 * bvz)
  +
</haskell>
  +
  +
== old entry ==
   
 
Modifies the ghc flags.
 
Modifies the ghc flags.

Revision as of 04:51, 10 February 2007

This is a ShootoutEntry for the N-body benchmark.

Each program should model the orbits of Jovian planets, using the same simple symplectic-integrator - see the Java program.

Correct output N = 1000 is

-0.169075164

-0.169087605

Contents

1 Benchmarks

Finally, we get some decent performance. Timing on Linux/P4, N=1,000,000


Author Time Flags
current 8.561 -O2 -optc-O3 -optc-ffast-math
current-fexcess-precision 6.335 -O2 -fexcess-precision -optc-O3 -optc-ffast-math
chris 4.515 -O2
chris 2.547 -O2 -fexcess-precision
chris 2.363 -O2 -fexcess-precision -optc-O -optc-ffast-math
C 1.174
chris+dons 1.112 -O2 -fexcess-precision -optc-O -optc-ffast-math
stuarray 1.073 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
stuarray2 0.912 -O3 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
C 0.518 -O2

Again with N=5,000,000 (as used in shootout):

Author Time Flags
current 42.685 -O2 -optc-O3 -optc-ffast-math
chris 11.753 -O2 -fexcess-precision -optc-O -optc-ffast-math
C 5.786
chris+dons 5.498 -O2 -fexcess-precision -optc-O -optc-ffast-math
stuarray 5.290 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
stuarray2 4.489 -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields
C 2.555 -O2

Hooray!

Need to add my smaller/faster tweak to Joel's entry. -- ChrisKuklewicz

On x86/Linux gcc produces weird numerical errors with the new IOUArray entries (chris and chris+dons), when using the -optc-Ox flags. It cannot be used above -optc-O1

chris ERROR -O2 -optc-O3 -optc-ffast-math
chris ERROR -O2 -optc-O2 -fexcess-precision -optc-ffast-math

1.1 Architecture differences

This is very CPU architecture dependent, which means a powerbook G4 is not seeing the same results as linux/x86.

Compiling both with -fglasgow-exts -fexcess-precision -O3 -optc-O3 -optc-ffast-math on a G4:

   $ time ./chris 5000000; time ./dons+chris 5000000
   -0.169075164
   -0.169083134
   real    0m27.794s
   user    0m21.176s
   sys     0m0.268s
   -0.169075164
   -0.169083134
   real    0m25.985s
   user    0m22.357s
   sys     0m0.236s

On linux/x86 {{{dons+chris}}} beats {{{chris}}} by a factor of two and on a G4 {{{dons+chris}}} is a few percent slower. Not too surprising, since the floating point hardware differs. The performanace for the this benchmark (AMD Sempron, Inten P4) is untunable on a G4.

So Don: you can submit this when you are happy with the tweaking since I am blind to x86 performance here. Can you try passing architecture switches to -optc such as -mtune? -- ChrisKuklewicz

2 Proposed entry

Complete rewrite, attention paid down to the asm level on the inner loops. Very much faster.

Note that in GHC -fexcess-precision is ignored on the command line!

Submitted

{-# OPTIONS -fexcess-precision #-}
--
-- The Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Don Stewart
--
-- Compile with:
--  -O -fglasgow-exts -fbang-patterns -optc-O3 -optc-march=pentium4
--
import System
import System.IO.Unsafe
import Text.Printf
import Foreign.Marshal.Array
import Foreign
import Control.Monad
import Bits
 
import GHC.Exts
 
------------------------------------------------------------------------
 
main = do
    n <- getArgs >>= readIO . head
    offset_momentum
    printf "%.9f\n" =<< energy
    replicateM_ n advance
    printf "%.9f\n" =<< energy
 
------------------------------------------------------------------------
 
type Bodies = Ptr Double
 
nbodies = 5
 
bodies :: Bodies
bodies = unsafePerformIO . newArray . concat $
   [planet 1 0 0 0 0 0 0                -- sun
   ,planet 9.54791938424326609e-04      -- jupiter
        4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
      ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
   ,planet 2.85885980666130812e-04      -- saturn
        8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
      (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
   ,planet 4.36624404335156298e-05      -- uranus
        1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
      ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
   ,planet 5.15138902046611451e-05      -- neptune
        1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
      ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
  where
    planet m x y z vx vy vz =
        [m*solar_mass,x, y, z, vx*days_per_year, vy*days_per_year, vz*days_per_year, 0]
    -- n.b widht of 8 doubles to make bitwise math nice
 
-- field access
(W# body) `at` (W# field) = I#
    (word2Int# (field `or#` (body `uncheckedShiftL#` 3#))) -- sizeof Double
 
put !body !field  = pokeElemOff bodies (body `at` field)
get !body !field  = peekElemOff bodies (body `at` field)
 
--
-- field offsets in flattened array
--
mass = 0
x    = 1
y    = 2
z    = 3
vx   = 4
vy   = 5
vz   = 6
solar_mass    = 4 * pi * pi
days_per_year = 365.24
 
------------------------------------------------------------------------
-- Offset momentum
 
offset_momentum :: IO ()
offset_momentum = do
    (px,py,pz) <- go 0 0 0 0
    put 0 vx (- px / solar_mass)
    put 0 vy (- py / solar_mass)
    put 0 vz (- pz / solar_mass)
  where
    go !i !px !py !pz
        | i >= nbodies = return (px,py,pz)
        | otherwise    = do
            imass <- look mass
            ivx   <- look vx
            ivy   <- look vy
            ivz   <- look vz
            go (i+1) (px + ivx * imass)
                     (py + ivy * imass)
                     (pz + ivz * imass)
 
        where look = get i
 
------------------------------------------------------------------------
-- Energy
 
energy = goI 0 0
 
goI :: Word -> Double -> IO Double
goI !i !e
    | i >= nbodies = return e
    | otherwise    = do
       im   <- look mass
       ix   <- look x      -- cache the body offset calculation?
       iy   <- look y
       iz   <- look z
       ivx  <- look vx
       ivy  <- look vy
       ivz  <- look vz
       e'   <- goJ ix iy iz im (i+1) (e + 0.5 * im * (ivx * ivx + ivy * ivy + ivz * ivz))
       goI (i+1) e'
    where
       look = get i
 
goJ !ix !iy !iz !im !j !e
    | j >= nbodies = return e
    | otherwise    = do
 
        b2m  <- look mass
        b2x  <- look x
        b2y  <- look y
        b2z  <- look z
        let dx   = ix - b2x
            dy   = iy - b2y
            dz   = iz - b2z
            distance = sqrt $! dx * dx + dy * dy + dz * dz
        goJ ix iy iz im (j+1) (e - ((im * b2m) / distance))
    where
        look = get j
 
------------------------------------------------------------------------
-- Advance
 
advance :: IO ()
advance = advanceI 0 >> update
 
advanceI i@(W# ii) = when (i < nbodies) $ do
    bm  <- lookI mass
    bx  <- lookI x      -- cache the body offset calculation!
    by  <- lookI y
    bz  <- lookI z
    bvx <- lookI vx              -- float these out to an accumultor
    bvy <- lookI vy
    bvz <- lookI vz
    advanceJ off bm bx by bz bvx bvy bvz (i+1)
    advanceI (i+1)
  where
    off   = W# (ii `uncheckedShiftL#` 3#)
    lookI x = peekElemOff bodies (fromIntegral $ x .|. off)
    setI  = put i
 
advanceJ !i_off !bm !bx !by !bz !bvx !bvy !bvz !j@(W# jj)
     | j >= nbodies = do
        let setI x = pokeElemOff bodies (fromIntegral $ x .|. i_off)
        setI vx bvx
        setI vy bvy
        setI vz bvz
 
     | otherwise    = do
 
        b2mass <- lookJ mass
        b2x    <- lookJ x
        b2y    <- lookJ y
        b2z    <- lookJ z
        b2vx   <- lookJ vx
        b2vy   <- lookJ vy
        b2vz   <- lookJ vz
 
        let dx = bx - b2x
            dy = by - b2y
            dz = bz - b2z
            distance = sqrt (dx * dx + dy * dy + dz * dz)
            mag      = 0.01 / (distance * distance * distance)
            massmag  = bm     * mag
 
        setJ vx (b2vx + dx * massmag)
        setJ vy (b2vy + dy * massmag)
        setJ vz (b2vz + dz * massmag)
 
        let mass2mag = - b2mass * mag
 
        advanceJ i_off bm bx by bz (bvx + dx * mass2mag)
                               (bvy + dy * mass2mag)
                               (bvz + dz * mass2mag) (j+1)
   where
    off   = W# (jj `uncheckedShiftL#` 3#)
    lookJ x = peekElemOff bodies (fromIntegral $ x .|. off)
    setJ  x = pokeElemOff bodies (fromIntegral $ x .|. off)
 
update = forM_ [0..nbodies-1] $ \i -> do
    let look  = get i
        set   = put i
    bx  <- look x
    by  <- look y
    bz  <- look z
    bvx <- look vx
    bvy <- look vy
    bvz <- look vz
    set x (bx + 0.01 * bvx)
    set y (by + 0.01 * bvy)
    set z (bz + 0.01 * bvz)

3 old entry

Modifies the ghc flags. Submitted.

{-# OPTIONS -O -fglasgow-exts -fbang-patterns -funbox-strict-fields -fexcess-precision -optc-O -optc-march=pentium4 #-}
--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Written in Haskell by Chris Kuklewicz, further tweaks by Don Stewart
--
-- -O2 -optc-O -fglasgow-exts -fexcess-precision -optc-ffast-math
--
-- -optc-O3 cannot be used, as at least one version of gcc miscompiles this program
--
 
import System
import System.IO.Unsafe
import Monad
import Bits
import List
import Data.Array.IO
import Data.Array.Base  (unsafeRead,unsafeWrite)
import Text.Printf
import GHC.Base
import GHC.Int
 
default (Int)
 
main = do n <- getArgs >>= readIO . head
          offsetMomentum
          energy >>= printf "%.9f\n"
          advance n
          energy >>= printf "%.9f\n"
 
-- Offsets for each field
x = 0; y = 1; z = 2; vx= 3; vy= 4; vz= 5; m = 6
 
type Bodies = IOUArray Int Double
 
b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData)
{-# NOINLINE b #-}
 
set = unsafeWrite b
{-# INLINE set #-}
 
resetB = mapM_ (uncurry set) (zip [0..] bodiesData)
 
-- sun jupiter saturn uranus neptune
-- sun starts at center at rest
bodiesData =  concat [ mkB 1 0 0 0 0 0 0
   ,mkB 9.54791938424326609e-04
        4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
      ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
   ,mkB 2.85885980666130812e-04
        8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
      (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
   ,mkB 4.36624404335156298e-05
        1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
      ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
   ,mkB 5.15138902046611451e-05
        1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
      ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
 
mkB m x y z vx vy vz =
    [x, y, z, vx*days_per_year,vy*days_per_year, vz*days_per_year, m*solar_mass, 0]
 
solar_mass    = 4 * pi * pi
days_per_year = 365.24
nbodies       = 4 -- that is 0 to 4
 
mass :: Int -> IO Double
mass i = unsafeRead b (massOffset .|. (shiftl i 3)) where massOffset = 6
 
-- Give the sun a small velocity so the total momentum of all bodies totals to zero
offsetMomentum :: IO ()
offsetMomentum = do sm <- mass 0
                    let act i = mass i >>= \m -> addScaled 3 (-m/sm) (3 .|. (shiftl i 3))
                    mapM_ act [1..nbodies]
 
-- Total all kineticE and potentialE
energy = loop 0 0
  where loop !i !e
            | i > nbodies = return e
            | otherwise   = do ke <- kineticE i
                               (loop' (i+1) i $! (e+ke)) >>= loop (i+1)
        loop' !j !i !e
            | j > nbodies = return e
            | otherwise   = do pe <- potentialE i j
                               loop' (j+1) i $! (e + pe)
 
kineticE !i = do
    m <- mass i
    vx <- unsafeRead b (f vx)
    vy <- unsafeRead b (f vy)
    vz <- unsafeRead b (f vz)
    return $! 0.5 * m * (vx*vx + vy*vy + vz*vz)
  where f = (.|. (shiftl i 3))
 
potentialE !i !j = do
    m1 <- mass i
    m2 <- mass j
    dx <- liftM2 (-) (unsafeRead b (f x)) (unsafeRead b (g x))
    dy <- liftM2 (-) (unsafeRead b (f y)) (unsafeRead b (g y))
    dz <- liftM2 (-) (unsafeRead b (f z)) (unsafeRead b (g z))
    return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz))
  where
    f = (.|.) (shiftl j 3)
    g = (.|.) (shiftl i 3)
 
addScaled !i !a !j = do
    set i1 =<< liftM2 scale (unsafeRead b i1) (unsafeRead b j1)
    set i2 =<< liftM2 scale (unsafeRead b i2) (unsafeRead b j2)
    set i3 =<< liftM2 scale (unsafeRead b i3) (unsafeRead b j3)
  where scale old new = old + a * new
        i1 = i; i2 = succ i1; i3 = succ i2;
        j1 = j; j2 = succ j1; j3 = succ j2;
 
addScaled3 !i !a !jx !jy !jz = do
    set i1 =<< liftM (scale jx) (unsafeRead b i1)
    set i2 =<< liftM (scale jy) (unsafeRead b i2)
    set i3 =<< liftM (scale jz) (unsafeRead b i3)
  where scale new old = a * new + old
        i1 = i; i2 = succ i1; i3 = succ i2;
 
-- This is the main code. Essentially all the time is spent here
advance :: Int -> IO ()
advance n = when (n > 0) $ updateVel 0 >> advance (pred n)
 
  where updateVel i = when (i <= nbodies) $ do
            im  <- unsafeRead b (f m)
            ix  <- unsafeRead b (f x)
            iy  <- unsafeRead b (f y)
            iz  <- unsafeRead b (f z)
            ivx <- unsafeRead b (f vx)
            ivy <- unsafeRead b (f vy)
            ivz <- unsafeRead b (f vz)
 
            let updateVel' :: Double -> Double -> Double -> Int -> IO ()
                updateVel' !ivx !ivy !ivz !j =
                  if j > nbodies then do
                    unsafeWrite b (f vx) ivx
                    unsafeWrite b (f vy) ivy
                    unsafeWrite b (f vz) ivz
                  else do
                    let g = (.|. shiftl j 3)
                    jm <- unsafeRead b (g m)
                    dx <- liftM (ix-) (unsafeRead b (g x))
                    dy <- liftM (iy-) (unsafeRead b (g y))
                    dz <- liftM (iz-) (unsafeRead b (g z))
                    let distance = sqrt (dx*dx+dy*dy+dz*dz)
                        mag = 0.01 / (distance * distance * distance)
                    addScaled3 (3 .|. (shiftl j 3)) ( im*mag) dx dy dz
                    let a = -jm*mag
                        ivx' = ivx+a*dx
                        ivy' = ivy+a*dy
                        ivz' = ivz+a*dz
                    updateVel' ivx' ivy' ivz' $! (j+1)
 
 
            updateVel' ivx ivy ivz $! (i+1)
            addScaled (shiftl i 3) 0.01 (3 .|. (shiftl i 3))
            updateVel (i+1)
 
          where
            f = (.|. shift i 3)
 
shiftl = shiftL -- (I# i) (I# j) = I# (word2Int# (uncheckedShiftL# (int2Word# i) j))

4 Old entry

On a powerbook G4 (GHC 6.4.1, gcc 4.0.1) this is 1.7x time faster than dons+chris. 12.5 seconds versus 21.2 seconds for N=5,000,000 --ChrisKuklewicz Unboxing the strict fields gives a 3x speedup.

--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Written by Joel Koerwer.
-- Uses 7 STUArrays for the state.
-- Edited for length by ChrisKuklewicz
--
-- -O3 -optc-O3 -fexcess-precision -funbox-strict-fields -optc-ffast-math
--
-- -funbox-strict-fields is critical
--
-- -optc-ffast-math doesn't speed things up for me.
-- Well, neither does -optc-O3, but that may just be here.
--
import Control.Monad    (liftM2,liftM3,liftM4)
import Control.Monad.ST (ST, runST)
import Data.Array.ST    (STUArray, newListArray)
import Data.Array.Base  (unsafeRead, unsafeWrite)
import System           (getArgs)
import Text.Printf      (printf)
 
(!) = unsafeRead :: STUArray s Int Double -> Int -> ST s Double 
writeArray = unsafeWrite :: STUArray s Int Double -> Int -> Double -> ST s () 
size :: Int = 5
dt :: Double = 0.01
 
data PhaseSpace s = PS {ms,rxs,rys,rzs,vxs,vys,vzs :: !(STUArray s Int Double)}
 
main :: IO ()
main = do n <- getArgs >>= readIO . head
          printf "%.9f\n" $ runST (do state <- initialState
                                      offsetMomentum state
                                      energy state)
          printf "%.9f\n" $ runST (do state <- initialState
                                      offsetMomentum state
                                      energy state
                                      advance n state
                                      energy state)
 
advance :: Int -> PhaseSpace s -> ST s ()
advance 0 _   = return ()
advance n sys = do kick sys
                   drift sys
                   advance (n-1) sys
 
readAll (PS m x y z vx vy vz) i = do (x,y,z) <- liftM3 (,,) (x!i)  (y!i) (z!i)
                                     (vx,vy,vz) <- liftM3 (,,) (vx!i) (vy!i) (vz!i)
                                     return (x,y,z,vx,vy,vz)
{-# INLINE readAll #-}
 
kick ps@(PS m rx ry rz vx vy vz) = outer 0 where
    outer i | i >= size = return ()
            | otherwise = inner (i+1) >> outer (i+1) where
                inner j | j >= size = return ()
                        | otherwise = do
                            (mi,mj) <- liftM2 (,) (m!i) (m!j)
                            (rxi,ryi,rzi,vxi,vyi,vzi) <- readAll ps i
                            (rxj,ryj,rzj,vxj,vyj,vzj) <- readAll ps j
                            let (dx,dy,dz) = (rxi-rxj, ryi-ryj, rzi-rzj)
                                dist2 = dx*dx + dy*dy + dz*dz
                                mag = dt / (dist2 * sqrt dist2)
                            writeArray vx i (vxi - dx*mj*mag)
                            writeArray vy i (vyi - dy*mj*mag)
                            writeArray vz i (vzi - dz*mj*mag)
                            writeArray vx j (vxj + dx*mi*mag)
                            writeArray vy j (vyj + dy*mi*mag)
                            writeArray vz j (vzj + dz*mi*mag)
                            inner (j+1)
 
drift ps@(PS _ rxs rys rzs _ _ _) = loop 0 where
    loop i | i >= size = return ()
           | otherwise = do (x,y,z,vx,vy,vz) <- readAll ps i
                            writeArray rxs i (x + dt*vx)
                            writeArray rys i (y + dt*vy)
                            writeArray rzs i (z + dt*vz)
                            loop (i+1)
 
energy sys = liftM2 (+) (kineticEnergy sys) (potentialEnergy sys)
 
kineticEnergy (PS ms _ _ _ vxs vys vzs) = loop 0 0 where
    loop i accum | i >= size = return (0.5 * accum)
                 | otherwise = do
                        (m,vx,vy,vz) <- liftM4 (,,,) (ms!i) (vxs!i) (vys!i) (vzs!i)
                        let v2 = vx*vx + vy*vy + vz*vz
                        loop (i+1) $! (accum + m*v2)
 
potentialEnergy (PS ms rxs rys rzs _ _ _) = outer 0 0 where
    outer i a | i >= size = return a
              | otherwise = inner i (i+1) a >>= outer (i+1)
    inner i j a | j >= size = return a
                | otherwise = do
                    (mi,xi,yi,zi) <- liftM4 (,,,) (ms!i) (rxs!i) (rys!i) (rzs!i)
                    (mj,xj,yj,zj) <- liftM4 (,,,) (ms!j) (rxs!j) (rys!j) (rzs!j)
                    let (dx,dy,dz) = (xi-xj, yi-yj, zi-zj)
                        dist = sqrt (dx*dx + dy*dy + dz*dz)
                    inner i (j+1) $! (a - mi*mj/dist)
 
offsetMomentum (PS ms rxs rys rzs vxs vys vzs) = do
    (px,py,pz) <- calcMomentum 0 (0,0,0)
    (mSun,vSunx,vSuny,vSunz) <- liftM4 (,,,) (ms!0) (vxs!0) (vys!0) (vzs!0)
    writeArray vxs 0 (vSunx - px/mSun)
    writeArray vys 0 (vSuny - py/mSun)
    writeArray vzs 0 (vSunz - pz/mSun)
  where calcMomentum i (px,py,pz) | i >= size = return (px,py,pz)
                                  | otherwise = do
                                        (m,vx,vy,vz) <- liftM4 (,,,) (ms!i) (vxs!i) (vys!i) (vzs!i)
                                        calcMomentum (i+1) $! (px+vx*m,py+vy*m,pz+vz*m)
 
initialState = do m <- mkSTUArray masses
                  x <- mkSTUArray positionXs
                  y <- mkSTUArray positionYs
                  z <- mkSTUArray positionZs
                  vx <- mkSTUArray velocityXs
                  vy <- mkSTUArray velocityYs
                  vz <- mkSTUArray velocityZs
                  return (PS m x y z vx vy vz)
 
mkSTUArray = newListArray (0,size-1) :: [Double] -> ST s (STUArray s Int Double)
 
masses = map (4*pi*pi*) [1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05]
 
positionXs = [0, 4.84143144246472090e+00, 8.34336671824457987e+00, 1.28943695621391310e+01, 1.53796971148509165e+01]
 
positionYs = [0, (-1.16032004402742839e+00), 4.12479856412430479e+00, (-1.51111514016986312e+01), (-2.59193146099879641e+01)]
 
positionZs = [0, (-1.03622044471123109e-01), (-4.03523417114321381e-01), (-2.23307578892655734e-01), 1.79258772950371181e-01]
 
velocityXs = map (365.24*) [0, 1.66007664274403694e-03, (-2.76742510726862411e-03), 2.96460137564761618e-03, 2.68067772490389322e-03]
 
velocityYs = map (365.24*) [0, 7.69901118419740425e-03, 4.99852801234917238e-03, 2.37847173959480950e-03, 1.62824170038242295e-03]
 
velocityZs = map (365.24*) [0, (-6.90460016972063023e-05), 2.30417297573763929e-05, (-2.96589568540237556e-05), (-9.51592254519715870e-05)]

5 An STUArray Version (Joel Koerwer)

This one is slightly faster than chris+dons on my machine. The "proposed entry" has ben submitted, so I pushed it and its associated benchmarks down.

I get a speed up from the STUArray versus the dons+chris version on a Powerbook G4. Don+chris is 4.38 s user time, Joel's is 3.74 s user time (17% faster). This makes sense to me, since it does not need to shiftL and .|. the indices. I was cleaning it up without making it slower and I accidentally made it faster (I think by hoisting dt and size). -- ChrisKuklewicz

--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Written by Joel Koerwer.
-- Uses 7 STUArrays for the state.
--
-- Performance is very similar to the IOUArray version of
-- Chris Kuklewicz and Don Stewart. This one is slightly faster
-- on my machine at n=5,000,000 (15 s vs 17 s).
--
-- -O3 -optc-O3 -fexcess-precision -funbox-strict-fields
--
-- -optc-ffast-math doesn't speed things up for me.
-- Well, neither does -optc-O3, but that may just be here.
--
 
 
import Control.Monad.ST (ST, runST)
import Data.Array.ST    (STUArray, newListArray)
import Data.Array.Base  (unsafeRead, unsafeWrite)
 
import System      (getArgs)
import Text.Printf (printf)
 
readArray :: STUArray s Int Double -> Int -> ST s Double
readArray = unsafeRead
 
writeArray :: STUArray s Int Double -> Int -> Double -> ST s ()
writeArray = unsafeWrite
 
data PhaseSpace s = PS {size :: !Int,
                        ms,rxs,rys,rzs,vxs,vys,vzs :: !(STUArray s Int Double)}
 
advance :: Int -> Double -> PhaseSpace s -> ST s ()
advance 0 _  _   = return ()
advance n dt sys = do
    kick dt sys
    drift dt sys
    advance (n-1) dt sys
 
kick :: Double -> PhaseSpace s -> ST s ()
kick dt (PS size m rx ry rz vx vy vz) = outer 0
    where
    outer i | i >= size = return ()
            | otherwise = inner (i+1) >> outer (i+1)
                where
                inner j | j >= size = return ()
                        | otherwise = do
                            mi <- readArray m i
                            mj <- readArray m j
                            rxi <- readArray rx i
                            rxj <- readArray rx j
                            ryi <- readArray ry i
                            ryj <- readArray ry j
                            rzi <- readArray rz i
                            rzj <- readArray rz j
                            vxi <- readArray vx i
                            vxj <- readArray vx j
                            vyi <- readArray vy i
                            vyj <- readArray vy j
                            vzi <- readArray vz i
                            vzj <- readArray vz j
                            let dx = rxi-rxj
                                dy = ryi-ryj
                                dz = rzi-rzj
                                dist2 = dx*dx + dy*dy + dz*dz
                                dist = sqrt dist2
                                mag = dt / (dist*dist2)
                            writeArray vx i (vxi - dx*mj*mag)
                            writeArray vy i (vyi - dy*mj*mag)
                            writeArray vz i (vzi - dz*mj*mag)
                            writeArray vx j (vxj + dx*mi*mag)
                            writeArray vy j (vyj + dy*mi*mag)
                            writeArray vz j (vzj + dz*mi*mag)
                            inner (j+1)
 
drift :: Double -> PhaseSpace s -> ST s ()
drift dt (PS size _ rxs rys rzs vxs vys vzs) = loop 0
    where
    loop i | i >= size = return ()
           | otherwise = do
                x <- readArray rxs i
                y <- readArray rys i
                z <- readArray rzs i
                vx <- readArray vxs i
                vy <- readArray vys i
                vz <- readArray vzs i
                writeArray rxs i (x + dt*vx)
                writeArray rys i (y + dt*vy)
                writeArray rzs i (z + dt*vz)
                loop (i+1)
 
energy :: PhaseSpace s -> ST s Double
energy sys = do t <- kineticEnergy sys
                u <- potentialEnergy sys
                return (t+u)
 
kineticEnergy :: PhaseSpace s -> ST s Double
kineticEnergy (PS size ms _ _ _ vxs vys vzs) = loop 0 0
    where
    loop i accum | i >= size = return (0.5 * accum)
                 | otherwise = do
                        m <- readArray ms i
                        vx <- readArray vxs i
                        vy <- readArray vys i
                        vz <- readArray vzs i
                        let v2 = vx*vx + vy*vy + vz*vz
                        loop (i+1) (accum + m*v2)
 
potentialEnergy :: PhaseSpace s -> ST s Double
potentialEnergy (PS size ms rxs rys rzs _ _ _) = outer 0 0
    where
    outer i a | i >= size = return a
              | otherwise = inner i (i+1) a >>= outer (i+1)
 
    inner i j a | j >= size = return a
                | otherwise = do
                    mi <- readArray ms i
                    mj <- readArray ms j
                    xi <- readArray rxs i
                    xj <- readArray rxs j
                    yi <- readArray rys i
                    yj <- readArray rys j
                    zi <- readArray rzs i
                    zj <- readArray rzs j
                    let dx = xi-xj
                        dy = yi-yj
                        dz = zi-zj
                        dist = sqrt (dx*dx + dy*dy + dz*dz)
                    inner i (j+1) (a - mi*mj/dist)
 
 
offsetMomentum :: PhaseSpace s -> ST s ()
offsetMomentum (PS size ms rxs rys rzs vxs vys vzs) = do
    (px,py,pz) <- calcMomentum 0 (0,0,0)
    mSun <- readArray ms 0
    vSunx <- readArray vxs 0
    vSuny <- readArray vys 0
    vSunz <- readArray vzs 0
    writeArray vxs 0 (vSunx - px/mSun)
    writeArray vys 0 (vSuny - py/mSun)
    writeArray vzs 0 (vSunz - pz/mSun)
        where
        calcMomentum i (px,py,pz) | i >= size = return (px,py,pz)
                                  | otherwise = do
                                        m <- readArray ms i
                                        vx <- readArray vxs i
                                        vy <- readArray vys i
                                        vz <- readArray vzs i
                                        let px' = px + vx*m
                                            py' = py + vy*m
                                            pz' = pz + vz*m
                                        calcMomentum (i+1) (px',py',pz')
 
main = do
    n <- getArgs >>= readIO . head
    printf "%.9f\n" $ runST (do state <- initialState
                                offsetMomentum state
                                energy state)
 
    printf "%.9f\n" $ runST (do state <- initialState
                                offsetMomentum state
                                advance n 0.01 state
                                energy state)
 
initialState :: ST s (PhaseSpace s)
initialState = do
        m <- mkSTUArray masses
        x <- mkSTUArray positionXs
        y <- mkSTUArray positionYs
        z <- mkSTUArray positionZs
        vx <- mkSTUArray velocityXs
        vy <- mkSTUArray velocityYs
        vz <- mkSTUArray velocityZs
        return (PS 5 m x y z vx vy vz)
 
mkSTUArray :: [Double] -> ST s (STUArray s Int Double)
mkSTUArray = newListArray (0,4)
 
masses = map (4*pi*pi*) [1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05]
 
positionXs = [0, 4.84143144246472090e+00, 8.34336671824457987e+00, 1.28943695621391310e+01, 1.53796971148509165e+01]
 
positionYs = [0, (-1.16032004402742839e+00), 4.12479856412430479e+00, (-1.51111514016986312e+01), (-2.59193146099879641e+01)]
 
positionZs = [0, (-1.03622044471123109e-01), (-4.03523417114321381e-01), (-2.23307578892655734e-01), 1.79258772950371181e-01]
 
velocityXs = map (365.24*) [0, 1.66007664274403694e-03, (-2.76742510726862411e-03), 2.96460137564761618e-03, 2.68067772490389322e-03]
 
velocityYs = map (365.24*) [0, 7.69901118419740425e-03, 4.99852801234917238e-03, 2.37847173959480950e-03, 1.62824170038242295e-03]
 
velocityZs = map (365.24*) [0, (-6.90460016972063023e-05), 2.30417297573763929e-05, (-2.96589568540237556e-05), (-9.51592254519715870e-05)]


6 Proposed Entry (dons+chris)

A carefully tuned version of Chris' breakthrough entry. Runs faster than unoptimised gcc C. Be sure not to use -optc-O3 on linux, or badness may ensue. The OPTIONS pragma passes -O to gcc last, so it will override any other -O args to gcc passed on the cmd line.

{-# OPTIONS -optc-O #-}
--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Written in Haskell by Chris Kuklewicz, further tweaks by Don Stewart
--
-- -O2 -optc-O -fglasgow-exts -fexcess-precision -optc-ffast-math
--
-- -optc-O3 cannot be used, as at least one version of gcc miscompiles this program
--
 
import System
import System.IO.Unsafe
import Monad
import Data.Bits
import Data.List
import Data.Array.IO
import Data.Array.Base(unsafeRead,unsafeWrite)
import Text.Printf
 
default (Int)
 
main = do n <- getArgs >>= readIO . head
          offsetMomentum
          energy >>= printf "%.9f\n"
          advance n
          energy >>= printf "%.9f\n"
 
-- Offsets for each field
x = 0; y = 1; z = 2; vx= 3; vy= 4; vz= 5; m = 6
 
type Bodies = IOUArray Int Double
 
b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData)
{-# NOINLINE b #-}
 
set = unsafeWrite b
{-# INLINE set #-}
 
resetB = mapM_ (uncurry set) (zip [0..] bodiesData)
 
-- sun jupiter saturn uranus neptune
-- sun starts at center at rest
bodiesData =  concat [ mkB 1 0 0 0 0 0 0
   ,mkB 9.54791938424326609e-04
        4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
      ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
   ,mkB 2.85885980666130812e-04
        8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
      (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
   ,mkB 4.36624404335156298e-05
        1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
      ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
   ,mkB 5.15138902046611451e-05
        1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
      ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
 
mkB m x y z vx vy vz = 
    [x, y, z, vx*days_per_year,vy*days_per_year, vz*days_per_year, m*solar_mass, 0]
 
solar_mass    = 4 * pi * pi
days_per_year = 365.24
nbodies       = 4 -- that is 0 to 4
 
mass i = unsafeRead b (massOffset .|. (shiftL i 3)) where massOffset = 6
 
-- Give the sun a small velocity so the total momentum of all bodies totals to zero
offsetMomentum :: IO ()
offsetMomentum = do sm <- mass 0
                    let act i = mass i >>= \m -> addScaled 3 (-m/sm) (3 .|. (shiftL i 3))
                    mapM_ act [1..nbodies]
 
-- Total all kineticE and potentialE
energy = loop 0 0
  where loop i e | i > nbodies = return e
                 | otherwise   = do ke <- kineticE i
                                    (loop' (i+1) i $! (e+ke)) >>= loop (i+1)
        loop' j i e | j > nbodies = return e
                    | otherwise   = do pe <- potentialE i j
                                       loop' (j+1) i $! (e + pe)
 
kineticE i = let i' = (.|. (shiftL i 3))
             in do m <- mass i
                   vx <- unsafeRead b (i' vx)
                   vy <- unsafeRead b (i' vy)
                   vz <- unsafeRead b (i' vz)
                   return $! 0.5 * m * (vx*vx + vy*vy + vz*vz)
 
potentialE i j = do
    m1 <- mass i
    m2 <- mass j
    let (i', j') = ((.|. (shiftL i 3)), (.|. (shiftL j 3)))
    dx <- liftM2 (-) (unsafeRead b (i' x)) (unsafeRead b (j' x))
    dy <- liftM2 (-) (unsafeRead b (i' y)) (unsafeRead b (j' y))
    dz <- liftM2 (-) (unsafeRead b (i' z)) (unsafeRead b (j' z))
    return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz))
 
addScaled i a j | i `seq` a `seq` j `seq` False = undefined -- stricitfy
addScaled i a j = do set i1 =<< liftM2 scale (unsafeRead b i1) (unsafeRead b j1)
                     set i2 =<< liftM2 scale (unsafeRead b i2) (unsafeRead b j2)
                     set i3 =<< liftM2 scale (unsafeRead b i3) (unsafeRead b j3)
    where scale old new = old + a * new
          i1 = i; i2 = succ i1; i3 = succ i2;
          j1 = j; j2 = succ j1; j3 = succ j2;
 
addScaled3 i a jx jy jz | i `seq` a `seq` jx `seq` jy `seq` jz `seq` False = undefined
addScaled3 i a jx jy jz = do set i1 =<< liftM (scale jx) (unsafeRead b i1)
                             set i2 =<< liftM (scale jy) (unsafeRead b i2)
                             set i3 =<< liftM (scale jz) (unsafeRead b i3)
    where scale new old = a * new + old
          i1 = i; i2 = succ i1; i3 = succ i2;
 
-- This is the main code. Essentially all the time is spent here
advance n = when (n > 0) $ updateVel 0 >> advance (pred n)
 
  where updateVel i = when (i <= nbodies) $ do
            let i' = (.|. shift i 3)
            im  <- unsafeRead b (i' m)
            ix  <- unsafeRead b (i' x)
            iy  <- unsafeRead b (i' y)
            iz  <- unsafeRead b (i' z)
            ivx <- unsafeRead b (i' vx)
            ivy <- unsafeRead b (i' vy)
            ivz <- unsafeRead b (i' vz)
 
            let updateVel' ivx ivy ivz j =  ivx `seq` ivy `seq` ivz `seq`
                  if j > nbodies then do
                    unsafeWrite b (i' vx) ivx
                    unsafeWrite b (i' vy) ivy
                    unsafeWrite b (i' vz) ivz
                  else do
                    let j' = (.|. shiftL j 3)
                    jm <- unsafeRead b (j' m)
                    dx <- liftM (ix-) (unsafeRead b (j' x))
                    dy <- liftM (iy-) (unsafeRead b (j' y))
                    dz <- liftM (iz-) (unsafeRead b (j' z))
                    let distance = sqrt (dx*dx+dy*dy+dz*dz)
                        mag = 0.01 / (distance * distance * distance)
                    addScaled3 (3 .|. (shiftL j 3)) ( im*mag) dx dy dz
                    let a = -jm*mag
                        ivx' = ivx+a*dx
                        ivy' = ivy+a*dy
                        ivz' = ivz+a*dz
                    updateVel' ivx' ivy' ivz' $! (j+1)
 
            updateVel' ivx ivy ivz $! (i+1)
            addScaled (shiftL i 3) 0.01 (3 .|. (shiftL i 3))
            updateVel (i+1)

7 Improved entry (chris)

I made it go faster! By a factor of 2.5, which is not closing much of the gap. But any speed improvement is a breakthrough at this point. Hopefully someone else can see why it is faster and speed it up further. I am just tweaking things at random now.

All the n-body data (7 doubles: mass x y z vx vy vz) is put into a single IOUArray Int Double. Each body is offset by 8 indices, so there is a padding of 1 double to get an alignment of 8. This let me calculate the offset of each double with {{{shiftL _ 3}}} and . The array itself is a global variable.

--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Rewritten in Haskell from C by Don Stewart
-- Rewritten again by Chris Kuklewicz
--
-- -O2 -optc-O3 -fglasgow-exts -fexcess-precision -optc-ffast-math
--
-- This code actually runs faster then the previous entry!
--
import Debug.Trace
 
import System
import System.IO.Unsafe
import Monad
import Data.Bits
import Data.List
import Data.Array.MArray
import Data.Array.IO
import Data.Array.Base(unsafeRead,unsafeWrite)
import Text.Printf
import Data.IORef
 
default(Int)
 
main = do
    args <- getArgs
    let n = if null args then 1000000 else read ( head args )
    offsetMomentum
    energy >>= printf "%.9f\n"
    advance n
    energy >>= printf "%.9f\n" 
 
-- Offsets for each field
x = 0
y = 1
z = 2
vx= 3
vy= 4
vz= 5
m = 6
 
type Bodies = IOUArray Int Double
 
{-# NOINLINE b #-}
b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData)
get = unsafeRead b
set = unsafeWrite b
resetB = mapM_ (uncurry set) (zip [0..] bodiesData)
 
-- sun jupiter saturn uranus neptune
-- sun starts at center at rest
bodiesData =  concat [ mkB 1 0 0 0 0 0 0
   ,mkB 9.54791938424326609e-04 
        4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
      ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
   ,mkB 2.85885980666130812e-04
        8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
      (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
   ,mkB 4.36624404335156298e-05
        1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
      ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
   ,mkB 5.15138902046611451e-05
        1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
      ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)]
 
mkB m x y z vx vy vz = [x, y, z, vx*days_per_year, 
  vy*days_per_year, vz*days_per_year, m*solar_mass, 0]
 
solar_mass    = 4 * pi * pi
days_per_year = 365.24
nbodies = 4 -- that is 0 to 4
 
sh i = shiftL i 3 -- multiply by 8
pos i = sh i
vel i = velOffset .|. sh i where velOffset = 3
mass i = get (massOffset .|. sh i) where massOffset = 6
 
-- Give the sun a small velocity so the total momentum of all bodies totals to zero
offsetMomentum :: IO ()
offsetMomentum = do  sm <- mass 0
                     let sv = vel 0
                         act i = mass i >>= \m -> addScaled sv (-m/sm) (vel i)
                     mapM_ act [1..nbodies]
 
-- Total all kineticE and potentialE
energy :: IO Double
energy = loop 0 0
  where loop i e | i > nbodies = return e
                 | otherwise   = do ke <- kineticE i
                                    (loop' (i+1) i $! (e+ke)) >>= loop (i+1) 
        loop' j i e | j > nbodies = return e
                    | otherwise   = do pe <- potentialE i j
                                       loop' (j+1) i $! (e + pe)
 
kineticE i = let i' = (.|. sh i)
             in do m <- mass i
                   vx <- get (i' vx)
                   vy <- get (i' vy)
                   vz <- get (i' vz)
                   return $! 0.5 * m * (vx*vx + vy*vy + vz*vz)
 
potentialE i j = do
  m1 <- mass i
  m2 <- mass j
  let i' = (.|. (sh i))
      j' = (.|. (sh j))
  dx <- liftM2 (-) (get (i' x)) (get (j' x))
  dy <- liftM2 (-) (get (i' y)) (get (j' y))
  dz <- liftM2 (-) (get (i' z)) (get (j' z))
  return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz))
 
dt = 0.01
 
addScaled i a j = let scale old new = old + a * new
                      i1=i; i2=succ i1; i3=succ i2;
                      j1=j; j2=succ j1; j3=succ j2;
                  in do set i1 =<< liftM2 scale (get i1) (get j1)
                        set i2 =<< liftM2 scale (get i2) (get j2)
                        set i3 =<< liftM2 scale (get i3) (get j3)
 
addScaled3 i a jx jy jz = do
  let scale new old = a * new + old
      i1=i; i2=succ i1; i3=succ i2;
  set i1 =<< liftM (scale jx) (get i1)
  set i2 =<< liftM (scale jy) (get i2)
  set i3 =<< liftM (scale jz) (get i3)
 
-- This is the main code. Essentially all the time is spent here
advance n = when (n>0) $ do
  let {-# NOINLINE updateVel #-}
      updateVel i = when (i <= nbodies) $ do 
        let i' = (.|. sh i)
        im <- get (i' m)
        ix <- get (i' x)
        iy <- get (i' y)
        iz <- get (i' z)
        ivx <- get (i' vx)
        ivy <- get (i' vy)
        ivz <- get (i' vz)
        let {-# INLINE updateVel' #-}
            updateVel' ivx ivy ivz j =  ivx `seq` ivy `seq` ivz `seq` 
              if j > nbodies then do
                set (i' vx) ivx
                set (i' vy) ivy
                set (i' vz) ivz
              else do
                let j' = (.|. sh j)
                jm <- get (j' m)
                dx <- liftM (ix-) (get (j' x))
                dy <- liftM (iy-) (get (j' y))
                dz <- liftM (iz-) (get (j' z))
                let distance = sqrt (dx*dx+dy*dy+dz*dz)
                    mag = dt / (distance * distance * distance)
                addScaled3 (vel j) ( im*mag) dx dy dz
                let a = -jm*mag
                    ivx' = ivx+a*dx
                    ivy' = ivy+a*dy
                    ivz' = ivz+a*dz
                updateVel' ivx' ivy' ivz' $! (j+1)
        updateVel' ivx ivy ivz $! (i+1)
        addScaled (pos i) dt (vel i)
        updateVel (i+1)
  updateVel 0
  advance (pred n)

8 Old attempt by ChrisKuklewicz

I heavily transformed Don's code. This now runs almost exactly as fast as Einar's code on my machine. So I am niether happy nor sad.

Note that this creates the {{{data MutVec = V ...}}} and {{{data Body = B}}} all at the beginning, puts them into an array, and never constructs any new ones. From them on it just looks up {{{IORef Double}}}'s and works on those.

I cannot find any Haskell idioms for making this run 10x faster, but other languages manage to do it, such as OCaml.

Would peeking and poking unboxed Doubles be the way to go?

--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Rewritten in Haskell from C by Don Stewart
-- Rewritten again by Chris Kuklewicz
--
-- -O2  -optc-O3 -funbox-strict-fields -fglasgow-exts -fexcess-precision -fasm
--
-- Runs at the same speed as the Haskell#3 open mutable records code
-- by Einar.
--
-- Advance is where ALL the work is done and where any optimization must go.
--
import System
import Monad
import Data.List
import Data.Array.Array
import Text.Printf
import Data.IORef
 
default()
 
type Bodies = Array Int Body
data MutVec = V { x, y, z :: !(IORef Double) }
data Body = B { mass :: !Double, pos,vel :: !MutVec } 
 
newV x y z =  x `seq` y `seq` z `seq` do liftM3 V (newIORef x) (newIORef y) (newIORef z)
 
mkB m x y z vx vy vz = liftM2 (B (m*solar_mass)) (newV x y z) 
  (newV (vx*days_per_year) (vy*days_per_year) (vz*days_per_year))
 
{-# INLINE mIORef #-}
mIORef r f = readIORef r >>= ( ($!) (writeIORef r)  ) . f 
{-# INLINE minus #-}
minus x = (\y -> y-x)
{-# INLINE do2 #-}
do2 fm m1 m2 = do x1 <- m1; x2 <- m2; fm x1 x2
 
{-# INLINE addScaled #-}
addScaled v1@(V x1 y1 z1) a v2@(V x2 y2 z2) = do
  (mIORef x1).(+).(*a) =<< readIORef x2
  (mIORef y1).(+).(*a) =<< readIORef y2
  (mIORef z1).(+).(*a) =<< readIORef z2
 
{-# INLINE setDiff #-}
setDiff d@(V dx dy dz) v1@(V x1 y1 z1) v2@(V x2 y2 z2) = do
  do2 (\a b->writeIORef dx $! (a-b)) (readIORef x1)  (readIORef x2)
  do2 (\a b->writeIORef dy $! (a-b)) (readIORef y1)  (readIORef y2)
  do2 (\a b->writeIORef dz $! (a-b)) (readIORef z1)  (readIORef z2)
  return d
 
{-# INLINE kineticE #-}
kineticE mass (V x y z) = do vx <- readIORef x ;  vy <- readIORef y ;  vz <- readIORef z
                             return $! 0.5 * mass * (vx*vx + vy*vy + vz*vz)
 
{-# INLINE magV #-}
magV (V x y z) = do
  liftM3 (\a b c->sqrt (a*a+b*b+c*c)) (readIORef x) (readIORef y) (readIORef z) >>= (return $!)
 
{-# INLINE potentialE #-}
potentialE mass1 (V x1 y1 z1) mass2 (V x2 y2 z2) = do
  dx <- liftM2 (-) (readIORef x1) (readIORef x2)
  dy <- liftM2 (-) (readIORef y1) (readIORef y2)
  dz <- liftM2 (-) (readIORef z1) (readIORef z2)
  return $! ((-1)*mass1*mass2/sqrt (dx*dx + dy*dy + dz*dz))
 
mkBodies :: IO Bodies
mkBodies = liftM (listArray (0,nbodies-1)) $ sequence bodiesData
 
-- sun jupiter saturn uranus neptune
bodiesData = [ mkB 1 0 0 0 0 0 0
  ,mkB 9.54791938424326609e-04 
       4.84143144246472090e+00  (-1.16032004402742839e+00) (-1.03622044471123109e-01)
     ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05)
  ,mkB 2.85885980666130812e-04
       8.34336671824457987e+00    4.12479856412430479e+00  (-4.03523417114321381e-01)
     (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05)
  ,mkB 4.36624404335156298e-05
       1.28943695621391310e+01  (-1.51111514016986312e+01) (-2.23307578892655734e-01)
     ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05)
  ,mkB 5.15138902046611451e-05
       1.53796971148509165e+01  (-2.59193146099879641e+01)   1.79258772950371181e-01
     ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05) ]
 
solar_mass    = 4 * pi * pi
days_per_year = 365.24
nbodies = length bodiesData
 
offsetMomentum :: Bodies -> IO ()
offsetMomentum arr = sequence_ [ addScaled sv (-m/sm) v | (B m _ v) <- tail (elems arr) ]
  where (B sm sp sv) = (arr ! 0)
 
energy :: Int -> Bodies -> IO Double
energy nbodies arr = loop 0 0
    where loop i e | i >= nbodies = return e
                   | otherwise    = do let (B mb bp bv) = arr ! i
                                       ke <- kineticE mb bv
                                       (loop' (i+1) bp mb $! (e+ke)) >>= loop (i+1) 
          loop' j bp mb e | j >= nbodies = return e
                          | otherwise    = do let (B mb2 bp2 _) = arr ! j
                                              pe <- potentialE mb bp mb2 bp2
                                              loop' (j+1) bp mb (e + pe)
 
main = do
    (n::Int) <- getArgs >>= readIO . head
    bodies <- mkBodies
    dxyz <- newV 0 0 0
    let dt = 0.01
        advance = do
          let updateVel i = when (i < nbodies) $ do 
                updateVel' (bodies ! i) (i+1) 
                updateVel (i+1)
              updateVel' b1@(B mb1 bp1 bv1) j = if j >= nbodies then return () else do
                let (B mb2 bp2 bv2)  = bodies ! j
                setDiff dxyz bp1 bp2
                distance <- magV dxyz
                let mag = dt / (distance * distance * distance)
                addScaled bv2 (mb1*mag) dxyz
                addScaled bv1 (-mb2*mag) dxyz
                updateVel' b1 (j+1) 
              updatePos i = when (i < nbodies) $ do 
                let (B _ bp bv) = bodies ! i
                addScaled bp dt bv
                updatePos (i+1)
          updateVel 0
          updatePos 0
    offsetMomentum bodies
    energy nbodies bodies >>= printf "%.9f\n"
    sequence_ . replicate n $ advance
    energy nbodies bodies >>= printf "%.9f\n"

9 Joel Koerwer's nth attempt

I suppose the comments say it all.

-- Joel Koerwer's attempt at the Computer Shootout nbody problem.
-- Not as fast as Einar Kartunnen's Open Mutable Records implementation,
-- but I like it!
-- I get about double the runtime of Einar's code.
 
import System (getArgs)
import Text.Printf (printf)
 
data Vector3 = V3 !Double !Double !Double deriving (Show, Eq)
 
instance Num Vector3 where
    (V3 x y z) + (V3 u v w) = V3 (x+u) (y+v) (z+w)
    (V3 x y z) - (V3 u v w) = V3 (x-u) (y-v) (z-w)
    (V3 x y z) * (V3 u v w) = V3 (x*u) (y*v) (z*w)
    fromInteger n = V3 (fromInteger n) (fromInteger n) (fromInteger n)
    abs (V3 x y z) = V3 (abs x) (abs y) (abs z)
    signum (V3 x y z) = V3 (signum x) (signum y) (signum z)
 
-- Mnemonically read "scalar times vector".
(.*|) :: Double -> Vector3 -> Vector3
a .*| (V3 x y z) = V3 (a*x) (a*y) (a*z)
 
-- Similarly for "vector divided by scalar".
(|/.) :: Vector3 -> Double -> Vector3
(V3 x y z) |/. a = V3 (x/a) (y/a) (z/a)
 
dot :: Vector3 -> Vector3 -> Double
(V3 x y z) `dot` (V3 u v w) = x*u + y*v + z*w
 
norm2 v = v `dot` v
norm = sqrt . norm2
 
type PhaseSpace = ([Vector3],[Vector3])
 
advance :: PhaseSpace -> PhaseSpace
advance sys@(rs,vs) = let
    vs' = zipWith (+) vs $ getDeltaVs sys
    rs' = zipWith ((+) . (dt .*| )) vs' rs
    in
    (rs', vs')
 
getDeltaVs :: PhaseSpace -> [Vector3]
getDeltaVs sys@(rs,_) = let
    timesMdT = zipWith (.*|) dtms
    mags = getMags rs
    in
    map (sum . timesMdT) mags
 
 
-- getMags does the "real work". Given a list of position vectors [r0,r1,..,rN]
-- return the "magnitude matrix"
--
-- [[  0   m01  m02 .. m0N],
--  [-m01   0   m12 .. m1N],
--  [-m02 -m12   0  .. m2N],
--  [  :    :      .    : ],
--  [  :    :        .  : ],
--  [-m0N -m1N -m2N ..  0 ]]
--
-- where mij = -mji = (rj - ri) / |rj - ri|^3.
--
-- This method is faster than calulating all differences
-- seperately, but not by a whole lot.
getMags :: [Vector3] -> [[Vector3]]
getMags [] = []
getMags (r:rs) = let
    mkMag r' = let d = r' - r in d |/. (norm d * norm2 d)
    newMags = map mkMag rs
    minusMags = map negate newMags
    in
    (0 : newMags) : zipWith (:) minusMags (getMags rs)
 
energy :: PhaseSpace -> Double
energy sys = kineticEnergy sys + potentialEnergy sys
 
-- T = 0.5 * sum_i mi * vi^2
kineticEnergy :: PhaseSpace -> Double
kineticEnergy (_,vs) = 0.5 * (sum $ zipWith (*) ms (map norm2 vs))
 
-- U = sum_{i/=j} -0.5*mi*mj/|ri-rj|
potentialEnergy :: PhaseSpace -> Double
potentialEnergy (rs,_) = let a = -0.5 :: Double in
    (a *) . sum $ zipWith ((*) . sum . zipWith (*) ms) (invDists rs) ms
 
-- Calculate the inverse distances between positions [r0,..rN].
-- Similar to getMags, but here, mij = |mij| = 1 / |ri-rj|.
invDists :: [Vector3] -> [[Double]]
invDists [] = []
invDists (r:rs) = let
    newDs = map ((1/) . norm . (r -)) rs
    in
    (0 : newDs) : zipWith (:) newDs (invDists rs)
 
-- offset gives the Sun an initial velocity such that the entire system
-- has zero overall momentum.
offset :: PhaseSpace -> PhaseSpace
offset (rs, vs@(vSun:vPlanets)) = let
    pTotal = sum $ zipWith (.*|) ms vs
    mSun = head ms
    dv = pTotal |/. mSun
    in
    (rs, vSun-dv : vPlanets)
 
---------- A strict version of iterate for our loop -----------
seqList [] y = y
seqList (x:xs) y = x `seq` seqList xs y
 
seqTuple (x,y) z = seqList x $ seqList y z
 
iterate' f a = let b = f a in seqTuple b $ a : iterate' f b
---------------------------------------------------------------
 
main = let
    sys = offset initialPhaseSpace
    print9 = printf "%.9f\n"
    in
    do
        [n'] <- getArgs
        let n = read n' :: Int
        print9 (energy sys)
        print9 . energy $ iterate' advance sys !! n
 
-- I've stuck all the ugly constants and initial conditions at the end.
dt = 0.01
solarMass = 4*pi*pi
daysPerYear = 365.24
posns =[V3 0 0 0,
        V3 4.8414314424647209 (-1.16032004402742839) (-1.03622044471123109e-01),
        V3 8.34336671824457987 4.12479856412430479 (-4.03523417114321381e-01),
        V3 12.8943695621391310 (-15.1111514016986312) (-2.23307578892655734e-1),
        V3 15.3796971148509165 (-25.9193146099879641) (1.79258772950371181e-1)
        ]
vels' =[
    V3 0 0 0,
    V3 1.66007664274403694e-3 7.69901118419740425e-3 (-6.90460016972063023e-5),
    V3 (-2.76742510726862411e-3) 4.99852801234917238e-3 2.30417297573763929e-5,
    V3 2.96460137564761618e-3 2.37847173959480950e-3 (-2.96589568540237556e-5),
    V3 2.68067772490389322e-3 1.62824170038242295e-3 (-9.51592254519715870e-5)
    ]
vels = map (daysPerYear .*|) vels'
initialPhaseSpace = (posns, vels)
 
dtms = map (dt *) ms -- The masses are only used by themselves in energy, so go
                     -- ahead and precalculate m*dt.
ms = map (solarMass *) [1, 9.54791938424326609e-04, 2.85885980666130812e-04,
                        4.36624404335156298e-05, 5.15138902046611451e-05]

10 An imperative entry

Based on the C version. Too many slow updates on the array still.

{-# OPTIONS -O2 -optc-O3 -funbox-strict-fields -fglasgow-exts #-}
--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Rewritten in Haskell from C by Don Stewart
--
-- Advance is still too slow.
--
import System
import Monad
import Data.Array.IO
import Data.Array.Base
import Text.Printf
 
data Planet = P { x, y, z, vx, vy, vz :: !Double } deriving Show
 
mass = [solar_mass
       ,9.54791938424326609e-04 * solar_mass
       ,2.85885980666130812e-04 * solar_mass
       ,4.36624404335156298e-05 * solar_mass
       ,5.15138902046611451e-05 * solar_mass]
 
bodies =[P 0 0 0 0 0 0                                  -- sun
 
        -- jupiter
        ,P  4.84143144246472090e+00                     -- jupiter
          (-1.16032004402742839e+00)
          (-1.03622044471123109e-01)
          ( 1.66007664274403694e-03 * days_per_year)
          ( 7.69901118419740425e-03 * days_per_year)
          (-6.90460016972063023e-05 * days_per_year)
 
        ,P  8.34336671824457987e+00                     -- saturn
            4.12479856412430479e+00
          (-4.03523417114321381e-01)
          (-2.76742510726862411e-03 * days_per_year)
          ( 4.99852801234917238e-03 * days_per_year)
          ( 2.30417297573763929e-05 * days_per_year)
 
        ,P  1.28943695621391310e+01                     -- uranus
          (-1.51111514016986312e+01)
          (-2.23307578892655734e-01)
          ( 2.96460137564761618e-03 * days_per_year)
          ( 2.37847173959480950e-03 * days_per_year)
          (-2.96589568540237556e-05 * days_per_year)
 
        ,P  1.53796971148509165e+01                     -- neptune
          (-2.59193146099879641e+01)
            1.79258772950371181e-01
          ( 2.68067772490389322e-03 * days_per_year)
          ( 1.62824170038242295e-03 * days_per_year)
          (-9.51592254519715870e-05 * days_per_year) ]
 
solar_mass    = 4 * pi * pi :: Double
days_per_year = 365.24
nbodies = 5
 
offsetMomentum :: Int -> IOArray Int Planet -> IOUArray Int Double -> IO ()
offsetMomentum nbodies arr mass = do
    let loop i px py pz 
            | i >= nbodies = return (px,py,pz)
            | otherwise    = do
                p  <- unsafeRead arr i
                mp <- unsafeRead mass i
                let px' = px + vx p * mp
                    py' = py + vy p * mp
                    pz' = pz + vz p * mp
                loop (i+1) px' py' pz'
 
    (px,py,pz) <- loop 0 0.0 0.0 0.0
 
    p <- unsafeRead arr 0
    unsafeWrite arr 0 $! p { vx = - px / solar_mass
                           , vy = - py / solar_mass
                           , vz = - pz / solar_mass }
 
energy :: Int -> IOArray Int Planet -> IOUArray Int Double -> IO Double
energy nbodies arr mass = loop 0 (0.0 :: Double)
    where loop i e 
            | i >= nbodies = return e
            | otherwise    = do
                b <- unsafeRead arr i
                mb<- unsafeRead mass i
                let (x,y,z) = (vx b, vy b, vz b)
                    e' = e + 0.5 * mb * (x*x + y*y + z*z)
                e'' <- loop' (i+1) e' b mb
                loop (i+1) e''
 
          loop' j e b mb
            | j >= nbodies = return e
            | otherwise    = do
                b2 <- unsafeRead arr j
                mb2<- unsafeRead mass j
                let dx = x b - x b2
                    dy = y b - y b2
                    dz = z b - z b2
                    distance = sqrt $! dx * dx + dy * dy + dz * dz
                loop' (j+1) (e - ((mb * mb2) / distance)) b mb
 
advance :: Int -> IOArray Int Planet -> IOUArray Int Double -> Double -> IO ()
advance nbodies arr mass dt = do
    for0 0
    for3 0
    where
        for0 i = when (i < nbodies) $ do
            b  <- unsafeRead arr i
            mb <- unsafeRead mass i
            for1 (i+1) mb b >>= unsafeWrite arr i
            for0 (i+1)
 
        for1 j mb b = if j >= nbodies then return b else do
            b2 <- unsafeRead arr j 
            mb2<- unsafeRead mass j
            let dx = x b - x b2
                dy = y b - y b2
                dz = z b - z b2
                distance = sqrt $! dx * dx + dy * dy + dz * dz
                mag = dt / (distance * distance * distance)
                (m1,m2) = (mb2 * mag, mb * mag)
 
            unsafeWrite arr j $! b2 { vx = vx b2 + dx * m2
                                    , vy = vy b2 + dy * m2
                                    , vz = vz b2 + dz * m2 }
 
            for1 (j+1) mb $! b { vx = vx b  - dx * m1
                               , vy = vy b  - dy * m1
                               , vz = vz b  - dz * m1 }
 
        for3 i = when (i < nbodies) $ do
            b <- unsafeRead arr i
            unsafeWrite arr i $! b { x = x b + dt * vx b 
                                   , y = y b + dt * vy b 
                                   , z = z b + dt * vz b } 
            for3 (i+1)
 
main = do
    (n::Int) <- getArgs >>= readIO . head
    arr      <- newListArray (0,nbodies-1) bodies :: IO (IOArray Int Planet)
    massarr  <- newListArray (0,nbodies-1) mass   :: IO (IOUArray Int Double)
    offsetMomentum nbodies arr massarr
    energy nbodies arr massarr >>= printf "%.9f\n" 
    sequence_ . replicate n $ advance nbodies arr massarr 0.01
    energy nbodies arr massarr >>= printf "%.9f\n"


11 Original entry

This is the "Haskell GHC #3" entry, the fastest of the three.

-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
-- NBody based on Open Mutable Records
-- Contributed by Einar Karttunen
 
import Control.Monad.Reader
import Data.IORef
import System
import Text.Printf
 
-- This is a monad for open mutable records programming
newtype OO t r = OO (ReaderT t IO r) deriving(Monad, MonadReader t, MonadIO)
 
-- run a computation with a record (object)
with :: s -> OO s a -> OO b a
with this (OO c) = liftIO (runReaderT c this)
 
-- run an OO computation producing an IO value.
ooToIO :: OO s a -> IO a
ooToIO (OO c) = runReaderT c undefined
 
-- the plain record types
data a :.: r = RC !a !r
infixr :.:
data END = END
 
-- Next we define a field access method.
class Select r f t | r f -> t                 where (!) :: r -> f -> Ref t
instance Select (Field f t :.: r) f t        where (!) (RC (F x) _) _ = x
instance Select r f t => Select (a :.: r) f t where (!) (RC _ t) = (!) t
 
-- And finally the type of mutable fields.
type Ref a = IORef a
newtype Field name rtype = F (Ref rtype)
 
-- Next we define a way to construct record values.
infixr ##
(##) :: v -> OO s r -> OO s ((Field f v) :.: r)
(##) v r = do { h <- liftIO (newIORef v); t <- r; return (RC (F h) t) }
end = return END :: OO s END
 
-- Get the value of a field.
value :: Select s f t => f -> OO s t
value a  = do x <- asks (\s -> s!a)
              liftIO (readIORef x)
 
-- Or set the value of a field.
(<-:) :: Select s f t => f -> t -> OO s ()
a <-: b  = do x <- asks (\s -> s!a)
              liftIO (writeIORef x b)
 
-- And as a convenience add value to an double field.
(+=) :: Select s f V3 => f -> V3 -> OO s V3
a += b   = do x <- asks (\s -> s!a)
              val <- liftIO (readIORef x)
              let z = val+b
              z `seq` liftIO (writeIORef x z)
              return z
 
-- V3 vector like things.
data V3 = V3 !Double !Double !Double deriving(Show,Eq)
instance Num V3 where
  (V3 x1 y1 z1) + (V3 x2 y2 z2) = V3 (x1+x2) (y1+y2) (z1+z2)
  (V3 x1 y1 z1) - (V3 x2 y2 z2) = V3 (x1-x2) (y1-y2) (z1-z2)
  (V3 x1 y1 z1) * (V3 x2 y2 z2) = V3 (x1*x2) (y1*y2) (z1*z2)
  fromInteger o = let v = fromInteger o in V3 v v v
instance Fractional V3 where
  (V3 x1 y1 z1) / (V3 x2 y2 z2) = V3 (x1/x2) (y1/y2) (z1/z2)
 
from field obj = with obj (value field)
dv3 v = V3 v v v
v3len (V3 x y z) = sqrt (x*x + y*y + z*z)
 
{-# RULE "dv3/mult" forall a b. dv3 a * (V3 x y z) = V3 (a*x) (a*y) (a*z) #-}
 
-- Bodies
data Base = Base; data Diff = Diff; data Mass = Mass
type Body = Field Base V3 :.: Field Diff V3 :.: Field Mass Double :.: END
 
-- Create a new body
newBody x1 y1 z1 x2 y2 z2 mass =
  V3 x1 y1 z1 ## (dv3 year * V3 x2 y2 z2) ## (mass * solarMass) ## end :: OO s Body
 
offsetMomemtum bodies@(b:_) = foldM comp 0 bodies >>= (\d -> with b (Diff <-: (-d / dv3 solarMass)))
    where comp old body = do diff <- Diff `from` body
                             mass <- Mass `from` body
                             return (old + (dv3 mass * diff))
 
advance dt bodies = advanceLoop dt bodies >> mapM_ comp bodies
    where comp body = with body ((\diff -> Base += (dv3 dt * diff)) =<< value Diff)
 
advanceLoop :: Double -> [Body] -> OO s ()
advanceLoop dt []          = return ()
advanceLoop dt (b:bs)      = do
  bmass <- Mass `from` b
  bbase <- Base `from` b
  let inner []     = return ()
      inner (p:ps) = do diff <- (Base `from` p >>= \pbase -> return (bbase - pbase))
                        let dist = v3len diff
                        let magdiff = dv3 (dt / (dist * dist * dist)) * diff
                        pmass <- Mass `from` p
                        with b (Diff += (dv3 pmass * (-magdiff)))
                        with p (Diff += (dv3 bmass * magdiff))
                        inner ps
  inner bs >> advanceLoop dt bs
 
energy :: [Body] -> Double -> OO s Double
energy []     e = return e
energy (p:ps) e = do pdiff@(V3 x y z) <- Diff `from` p
                     pmass <- Mass `from` p
                     pbase <- Base `from` p
                     let e' = e + 0.5 * pmass * (x*x + y*y + z*z)
                     let inner []     e = return e
                         inner (j:js) e = do jmass <- Mass `from` j
                                             jbase <- Base `from` j
                                             inner js $! (e - ((pmass*jmass)/v3len(pbase - jbase)))
                     inner ps e' >>= energy ps
 
solarMass = 4*pi*pi :: Double
year = 365.24 :: Double
 
createBodies = do
  b0 <- newBody 0 0 0 0 0 0 1
  b1 <- newBody 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01)
               (1.66007664274403694e-03) (7.69901118419740425e-03) (-6.90460016972063023e-05)
               9.54791938424326609e-04
  b2 <- newBody 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01)
               (-2.76742510726862411e-03) (4.99852801234917238e-03) (2.30417297573763929e-05)
               (2.85885980666130812e-04)
  b3 <- newBody 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01)
               2.96460137564761618e-03 2.37847173959480950e-03 (-2.96589568540237556e-05)
               4.36624404335156298e-05
  b4 <- newBody 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01
               2.68067772490389322e-03 1.62824170038242295e-03 (-9.51592254519715870e-05)
               5.15138902046611451e-05
  return [b0,b1,b2,b3,b4]
 
main = do ~[n] <- getArgs
          (e1,e2) <- ooToIO (do bodies <- createBodies
                                offsetMomemtum bodies
                                e1 <- energy bodies 0
                                sequence_ $ replicate (read n) $ advance 0.01 bodies
                                e2 <- energy bodies 0
                                return (e1,e2))
          printf "%.9f\n%.9f\n" e1 e2


11.1 Smaller code

Anyone interested in seeing how small we can make the code? Here's my original attempt, which unfortunately suffers from a massive space-leak. I'd be interested in seeing if it is possible to have a succinct version without leaks.

import System(getArgs)
import Numeric
import Data.List
 
data Vec = V !Double !Double !Double deriving Show
instance Num Vec where
    (V a b c) + (V x y z) = (V (a+x) (b+y) (c+z))
    (V a b c) - (V x y z) = (V (a-x) (b-y) (c-z))
    fromInteger 0 = V 0.0 0.0 0.0 -- for sum function
instance Eq Vec where (V a b c) == (V x y z) = (a==x) && (b==y) && (c==z)
 
dot (V a b c) (V x y z) = a*x + b*y + c*z
scale (V a b c) n = V (n*a) (n*b) (n*c)
mag (V x y z) =  sqrt (x*x + y*y + z*z)
 
data Planet = Planet Vec Vec Double deriving Show --Position Velocity Mass
dist (Planet p1 _ _) (Planet p2 _ _) = mag $ p1 - p2
mass (Planet _ _ m) = m
vel  (Planet _ v _) = v
pos  (Planet p _ _) = p
 
main = do 
        [arg] <- getArgs
        let iter = read arg
        let n = 5::Int
        let bodies = offset_momentum n [sun, jupiter, saturn, neptune, uranus]
        let begin = energy n bodies
        putStrLn $ showFFloat (Just 9) begin ""
        let final = (iterate (advance n 0.01) bodies)
        let end = energy n (final !! iter)
        putStrLn $ showFFloat (Just 9) end ""
 
days_per_year = 365.24 
solar_mass = 4.0 * pi * pi
 
advance :: Int -> Double -> [Planet] -> [Planet]
advance n dt (p:ps) = take n ((Planet (pos p + delta_x)
                              (new_v) (mass p) ) : advance n dt (ps++[p]))
 where
   delta_v = sum (map (\q -> 
                  (pos p - pos q) `scale` ((mass q)*dt/(dist p q)^3)) ps)
   new_v   = (vel p) - delta_v
   delta_x = new_v `scale` dt 
 
energy:: Int -> [Planet] -> Double
energy n ps = kinetic - potential
  where
    kinetic   = 0.5 * (sum (map (\q->(mass q)*((vel q) `dot` (vel q))) ps))
    potential = sum $ zipWith grav_pot ps (tail (init (tails ps)))
    grav_pot x xs = sum $ map (\y -> (mass x)*(mass y)/(dist x y)) xs
 
offset_momentum n ((Planet p v m):ps) = (Planet p new_v m):ps
  where new_v = (sum (map (\n->(vel n) `scale` (mass n)) ps)) 
                `scale` ((-1.0)/solar_mass)
 
jupiter = (Planet 
 (V 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01))
 (V ( 1.66007664274403694e-03 * days_per_year)
    ( 7.69901118419740425e-03 * days_per_year)
    ((-6.90460016972063023e-05) * days_per_year)) 
 (9.54791938424326609e-04 * solar_mass))
 
saturn = (Planet
 (V 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01))
 (V (-2.76742510726862411e-03 * days_per_year) 
    (4.99852801234917238e-03 * days_per_year)
    (2.30417297573763929e-05 * days_per_year))
 (2.85885980666130812e-04 * solar_mass))
 
uranus = (Planet
 (V 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01))
 (V (2.96460137564761618e-03 * days_per_year)
    (2.37847173959480950e-03 * days_per_year)
    (-2.96589568540237556e-05 * days_per_year))
 (4.36624404335156298e-05 * solar_mass))
 
neptune = (Planet
 (V 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01)
 (V (2.68067772490389322e-03 * days_per_year)
    (1.62824170038242295e-03 * days_per_year)
    (-9.51592254519715870e-05 * days_per_year))
 (5.15138902046611451e-05 * solar_mass))
 
sun = (Planet (V 0.0 0.0 0.0) (V 0.0 0.0 0.0) solar_mass)