Shootout/Nbody
From HaskellWiki
(→Proposed entry) |
|||
| Line 95: | Line 95: | ||
''So Don: you can submit this when you are happy with the tweaking since I am blind to x86 performance here. Can you try passing architecture switches to -optc such as -mtune?'' -- ChrisKuklewicz | ''So Don: you can submit this when you are happy with the tweaking since I am blind to x86 performance here. Can you try passing architecture switches to -optc such as -mtune?'' -- ChrisKuklewicz | ||
| + | |||
| + | == Proposed entry 2 == | ||
| + | |||
| + | Improves on the one below, shorter and at tad faster. | ||
| + | |||
| + | <haskell> | ||
| + | {-# OPTIONS -fexcess-precision #-} | ||
| + | -- | ||
| + | -- The Computer Language Shootout | ||
| + | -- http://shootout.alioth.debian.org/ | ||
| + | -- | ||
| + | -- Contributed by Olof Kraigher, with help from Don Stewart. | ||
| + | -- | ||
| + | -- Compile with: | ||
| + | -- | ||
| + | -- -funbox-strict-fields -fglasgow-exts -fbang-patterns -O3 | ||
| + | -- -optc-O3 -optc-mfpmath=sse -optc-msse2 -optc-march=pentium4 | ||
| + | -- | ||
| + | |||
| + | import Foreign | ||
| + | import Foreign.Storable | ||
| + | import Foreign.Marshal.Alloc | ||
| + | import Data.IORef | ||
| + | import Control.Monad | ||
| + | import System | ||
| + | import Text.Printf | ||
| + | |||
| + | main = do | ||
| + | n <- getArgs >>= readIO.head | ||
| + | initialize | ||
| + | offset_momentum | ||
| + | energy 0 planets >>= printf "%.9f\n" | ||
| + | replicateM_ n (advance planets) | ||
| + | energy 0 planets >>= printf "%.9f\n" | ||
| + | |||
| + | offset_momentum = do | ||
| + | m <- foldr (.+.) (Vec 0 0 0) | ||
| + | `fmap` (mapM momentum | ||
| + | . take (nbodies - 1) | ||
| + | . iterate next $ next planets) | ||
| + | |||
| + | setVec (vel planets) $ (-1/solar_mass) *. m | ||
| + | where | ||
| + | momentum !p = liftM2 (*.) (mass p) (getVec (vel p)) | ||
| + | |||
| + | energy :: Double -> Ptr Double -> IO Double | ||
| + | energy !e !p | ||
| + | | p == end = return e | ||
| + | | otherwise = do | ||
| + | p1 <- getVec (pos p) | ||
| + | v1 <- getVec (vel p) | ||
| + | m1 <- mass p | ||
| + | e <- energy2 p1 m1 e p2 | ||
| + | energy (e + 0.5 * m1 * magnitude2 v1) p2 | ||
| + | where p2 = next p | ||
| + | |||
| + | energy2 !p1 !m1 !e !p | ||
| + | | p == end = return e | ||
| + | | otherwise = do | ||
| + | p2 <- getVec (pos p) | ||
| + | v2 <- getVec (vel p) | ||
| + | m2 <- mass p | ||
| + | let distance = sqrt . magnitude2 $ p1 .-. p2 | ||
| + | energy2 p1 m1 (e - m1 * m2 / distance) (next p) | ||
| + | |||
| + | advance :: Ptr Double -> IO () | ||
| + | advance !p1 = when (p1 /= end) $ do | ||
| + | pos1 <- getVec $ pos p1 | ||
| + | m1 <- mass p1 | ||
| + | let go !p2 | ||
| + | | p2 /= end = do | ||
| + | pos2 <- getVec (pos p2) | ||
| + | m2 <- mass p2 | ||
| + | let vel2 = vel p2 | ||
| + | difference = pos1 .-. pos2 | ||
| + | distance2 = magnitude2 difference | ||
| + | distance = sqrt distance2 | ||
| + | magnitude = delta_t / (distance2 * distance) | ||
| + | mass_magn = magnitude *. difference | ||
| + | vel1 -= m2 *. mass_magn | ||
| + | vel2 += m1 *. mass_magn | ||
| + | go (next p2) | ||
| + | |||
| + | | otherwise = do | ||
| + | v1 <- getVec vel1 | ||
| + | p1 += delta_t *. v1 | ||
| + | go p2 | ||
| + | advance p2 | ||
| + | where | ||
| + | vel1 = vel p1 | ||
| + | p2 = next p1 | ||
| + | |||
| + | ------------------------------------------------------------------------ | ||
| + | |||
| + | planets :: Ptr Double | ||
| + | planets = unsafePerformIO $ mallocBytes (7 * nbodies * 8) -- sizeOf double = 8 | ||
| + | |||
| + | nbodies :: Int | ||
| + | nbodies = 5 | ||
| + | |||
| + | solar_mass, delta_t, days_per_year :: Double | ||
| + | days_per_year = 365.24 | ||
| + | solar_mass = 4 * pi ** 2; | ||
| + | delta_t = 0.01 | ||
| + | |||
| + | initialize = mapM_ newPlanet planets | ||
| + | where | ||
| + | dp = days_per_year | ||
| + | planets = | ||
| + | [0, 0, 0, | ||
| + | 0, 0, 0, | ||
| + | 1 * solar_mass, | ||
| + | 4.84143144246472090e+00, (-1.16032004402742839e+00), (-1.03622044471123109e-01), | ||
| + | 1.66007664274403694e-03*dp, 7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp, | ||
| + | 9.54791938424326609e-04 * solar_mass, | ||
| + | |||
| + | 8.34336671824457987e+00, 4.12479856412430479e+00, (-4.03523417114321381e-01), | ||
| + | (-2.76742510726862411e-03)*dp, 4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp, | ||
| + | 2.85885980666130812e-04 * solar_mass, | ||
| + | |||
| + | 1.28943695621391310e+01, (-1.51111514016986312e+01), (-2.23307578892655734e-01), | ||
| + | 2.96460137564761618e-03*dp, 2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp, | ||
| + | 4.36624404335156298e-05 * solar_mass, | ||
| + | |||
| + | 1.53796971148509165e+01, (-2.59193146099879641e+01), 1.79258772950371181e-01, | ||
| + | 2.68067772490389322e-03*dp, 1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp, | ||
| + | 5.15138902046611451e-05 * solar_mass | ||
| + | ] | ||
| + | |||
| + | ------------------------------------------------------------------------ | ||
| + | -- Support for 3 dimensional mutable vectors | ||
| + | |||
| + | data Vector3 = Vec !Double !Double !Double | ||
| + | |||
| + | end :: Ptr Double | ||
| + | end = inc planets $ nbodies * 7 | ||
| + | |||
| + | next :: Ptr Double -> Ptr Double | ||
| + | next p = inc p 7 | ||
| + | |||
| + | cursor :: IORef (Ptr Double) | ||
| + | cursor = unsafePerformIO $ newIORef planets | ||
| + | |||
| + | inc :: Ptr Double -> Int -> Ptr Double | ||
| + | inc ptr n = plusPtr ptr (n * 8) | ||
| + | |||
| + | newPlanet :: Double -> IO () | ||
| + | newPlanet !d = do | ||
| + | ptr <- readIORef cursor | ||
| + | pokeElemOff ptr 0 d | ||
| + | writeIORef cursor (inc ptr 1) | ||
| + | |||
| + | pos :: Ptr Double -> Ptr Double | ||
| + | pos ptr = ptr | ||
| + | |||
| + | vel :: Ptr Double -> Ptr Double | ||
| + | vel ptr = inc ptr 3 | ||
| + | |||
| + | mass :: Ptr Double -> IO Double | ||
| + | mass ptr = peekElemOff ptr 6 | ||
| + | |||
| + | ------------------------------------------------------------------------ | ||
| + | |||
| + | (Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w) | ||
| + | |||
| + | (Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w) | ||
| + | |||
| + | k *. (Vec x y z) = Vec (k*x) (k*y) (k*z) -- allocates | ||
| + | |||
| + | magnitude2 (Vec x y z) = x*x + y*y + z*z | ||
| + | |||
| + | ------------------------------------------------------------------------ | ||
| + | |||
| + | getVec !p = liftM3 Vec (peek p) (f 1) (f 2) | ||
| + | where f = peekElemOff p | ||
| + | |||
| + | setVec p (Vec x y z)= do | ||
| + | poke p x | ||
| + | pokeElemOff p 1 y | ||
| + | pokeElemOff p 2 z | ||
| + | |||
| + | infix 4 += | ||
| + | infix 4 -= | ||
| + | |||
| + | v1 += (Vec u v w) = do | ||
| + | x <- peek v1; poke v1 (x+u) | ||
| + | y <- peekElemOff v1 1; pokeElemOff v1 1 (y+v) | ||
| + | z <- peekElemOff v1 2; pokeElemOff v1 2 (z+w) | ||
| + | |||
| + | v1 -= (Vec u v w) = do | ||
| + | x <- peek v1; poke v1 (x-u) | ||
| + | y <- peekElemOff v1 1; pokeElemOff v1 1 (y-v) | ||
| + | z <- peekElemOff v1 2; pokeElemOff v1 2 (z-w) | ||
| + | </haskell> | ||
| + | |||
== Proposed entry == | == Proposed entry == | ||
Revision as of 04:32, 11 February 2007
This is a ShootoutEntry for the N-body benchmark.
Each program should model the orbits of Jovian planets, using the same simple symplectic-integrator - see the Java program.
Correct output N = 1000 is
-0.169075164
-0.169087605
Contents |
1 Benchmarks
Finally, we get some decent performance. Timing on Linux/P4, N=1,000,000
| Author | Time | Flags |
|---|---|---|
| current | 8.561 | -O2 -optc-O3 -optc-ffast-math |
| current-fexcess-precision | 6.335 | -O2 -fexcess-precision -optc-O3 -optc-ffast-math |
| chris | 4.515 | -O2 |
| chris | 2.547 | -O2 -fexcess-precision |
| chris | 2.363 | -O2 -fexcess-precision -optc-O -optc-ffast-math |
| C | 1.174 | |
| chris+dons | 1.112 | -O2 -fexcess-precision -optc-O -optc-ffast-math |
| stuarray | 1.073 | -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields |
| stuarray2 | 0.912 | -O3 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields |
| C | 0.518 | -O2 |
Again with N=5,000,000 (as used in shootout):
| Author | Time | Flags |
|---|---|---|
| current | 42.685 | -O2 -optc-O3 -optc-ffast-math |
| chris | 11.753 | -O2 -fexcess-precision -optc-O -optc-ffast-math |
| C | 5.786 | |
| chris+dons | 5.498 | -O2 -fexcess-precision -optc-O -optc-ffast-math |
| stuarray | 5.290 | -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields |
| stuarray2 | 4.489 | -O2 -fexcess-precision -optc-O -optc-ffast-math -funbox-strict-fields |
| C | 2.555 | -O2 |
Hooray!
Need to add my smaller/faster tweak to Joel's entry. -- ChrisKuklewicz
On x86/Linux gcc produces weird numerical errors with the new IOUArray entries (chris and chris+dons), when using the -optc-Ox flags. It cannot be used above -optc-O1
| chris | ERROR | -O2 -optc-O3 -optc-ffast-math |
| chris | ERROR | -O2 -optc-O2 -fexcess-precision -optc-ffast-math |
1.1 Architecture differences
This is very CPU architecture dependent, which means a powerbook G4 is not seeing the same results as linux/x86.
Compiling both with -fglasgow-exts -fexcess-precision -O3 -optc-O3 -optc-ffast-math on a G4:
$ time ./chris 5000000; time ./dons+chris 5000000 -0.169075164 -0.169083134
real 0m27.794s user 0m21.176s sys 0m0.268s -0.169075164 -0.169083134
real 0m25.985s user 0m22.357s sys 0m0.236s
On linux/x86 {{{dons+chris}}} beats {{{chris}}} by a factor of two and on a G4 {{{dons+chris}}} is a few percent slower. Not too surprising, since the floating point hardware differs. The performanace for the this benchmark (AMD Sempron, Inten P4) is untunable on a G4.
So Don: you can submit this when you are happy with the tweaking since I am blind to x86 performance here. Can you try passing architecture switches to -optc such as -mtune? -- ChrisKuklewicz
2 Proposed entry 2
Improves on the one below, shorter and at tad faster.
{-# OPTIONS -fexcess-precision #-} -- -- The Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Olof Kraigher, with help from Don Stewart. -- -- Compile with: -- -- -funbox-strict-fields -fglasgow-exts -fbang-patterns -O3 -- -optc-O3 -optc-mfpmath=sse -optc-msse2 -optc-march=pentium4 -- import Foreign import Foreign.Storable import Foreign.Marshal.Alloc import Data.IORef import Control.Monad import System import Text.Printf main = do n <- getArgs >>= readIO.head initialize offset_momentum energy 0 planets >>= printf "%.9f\n" replicateM_ n (advance planets) energy 0 planets >>= printf "%.9f\n" offset_momentum = do m <- foldr (.+.) (Vec 0 0 0) `fmap` (mapM momentum . take (nbodies - 1) . iterate next $ next planets) setVec (vel planets) $ (-1/solar_mass) *. m where momentum !p = liftM2 (*.) (mass p) (getVec (vel p)) energy :: Double -> Ptr Double -> IO Double energy !e !p | p == end = return e | otherwise = do p1 <- getVec (pos p) v1 <- getVec (vel p) m1 <- mass p e <- energy2 p1 m1 e p2 energy (e + 0.5 * m1 * magnitude2 v1) p2 where p2 = next p energy2 !p1 !m1 !e !p | p == end = return e | otherwise = do p2 <- getVec (pos p) v2 <- getVec (vel p) m2 <- mass p let distance = sqrt . magnitude2 $ p1 .-. p2 energy2 p1 m1 (e - m1 * m2 / distance) (next p) advance :: Ptr Double -> IO () advance !p1 = when (p1 /= end) $ do pos1 <- getVec $ pos p1 m1 <- mass p1 let go !p2 | p2 /= end = do pos2 <- getVec (pos p2) m2 <- mass p2 let vel2 = vel p2 difference = pos1 .-. pos2 distance2 = magnitude2 difference distance = sqrt distance2 magnitude = delta_t / (distance2 * distance) mass_magn = magnitude *. difference vel1 -= m2 *. mass_magn vel2 += m1 *. mass_magn go (next p2) | otherwise = do v1 <- getVec vel1 p1 += delta_t *. v1 go p2 advance p2 where vel1 = vel p1 p2 = next p1 ------------------------------------------------------------------------ planets :: Ptr Double planets = unsafePerformIO $ mallocBytes (7 * nbodies * 8) -- sizeOf double = 8 nbodies :: Int nbodies = 5 solar_mass, delta_t, days_per_year :: Double days_per_year = 365.24 solar_mass = 4 * pi ** 2; delta_t = 0.01 initialize = mapM_ newPlanet planets where dp = days_per_year planets = [0, 0, 0, 0, 0, 0, 1 * solar_mass, 4.84143144246472090e+00, (-1.16032004402742839e+00), (-1.03622044471123109e-01), 1.66007664274403694e-03*dp, 7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp, 9.54791938424326609e-04 * solar_mass, 8.34336671824457987e+00, 4.12479856412430479e+00, (-4.03523417114321381e-01), (-2.76742510726862411e-03)*dp, 4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp, 2.85885980666130812e-04 * solar_mass, 1.28943695621391310e+01, (-1.51111514016986312e+01), (-2.23307578892655734e-01), 2.96460137564761618e-03*dp, 2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp, 4.36624404335156298e-05 * solar_mass, 1.53796971148509165e+01, (-2.59193146099879641e+01), 1.79258772950371181e-01, 2.68067772490389322e-03*dp, 1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp, 5.15138902046611451e-05 * solar_mass ] ------------------------------------------------------------------------ -- Support for 3 dimensional mutable vectors data Vector3 = Vec !Double !Double !Double end :: Ptr Double end = inc planets $ nbodies * 7 next :: Ptr Double -> Ptr Double next p = inc p 7 cursor :: IORef (Ptr Double) cursor = unsafePerformIO $ newIORef planets inc :: Ptr Double -> Int -> Ptr Double inc ptr n = plusPtr ptr (n * 8) newPlanet :: Double -> IO () newPlanet !d = do ptr <- readIORef cursor pokeElemOff ptr 0 d writeIORef cursor (inc ptr 1) pos :: Ptr Double -> Ptr Double pos ptr = ptr vel :: Ptr Double -> Ptr Double vel ptr = inc ptr 3 mass :: Ptr Double -> IO Double mass ptr = peekElemOff ptr 6 ------------------------------------------------------------------------ (Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w) (Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w) k *. (Vec x y z) = Vec (k*x) (k*y) (k*z) -- allocates magnitude2 (Vec x y z) = x*x + y*y + z*z ------------------------------------------------------------------------ getVec !p = liftM3 Vec (peek p) (f 1) (f 2) where f = peekElemOff p setVec p (Vec x y z)= do poke p x pokeElemOff p 1 y pokeElemOff p 2 z infix 4 += infix 4 -= v1 += (Vec u v w) = do x <- peek v1; poke v1 (x+u) y <- peekElemOff v1 1; pokeElemOff v1 1 (y+v) z <- peekElemOff v1 2; pokeElemOff v1 2 (z+w) v1 -= (Vec u v w) = do x <- peek v1; poke v1 (x-u) y <- peekElemOff v1 1; pokeElemOff v1 1 (y-v) z <- peekElemOff v1 2; pokeElemOff v1 2 (z-w)
3 Proposed entry
A fast and relatively nice entry that runs 1.5x the fastest c++ entry.
{-# OPTIONS_GHC -fbang-patterns -fexcess-precision#-} {- The Computer Language Shootout http://shootout.alioth.debian.org/ Contributed by Olof Kraigher, with help from Don Stewart. Compile with: -fglasgow-exts -fbang-patterns -fexcess-precision -O3 -optc-O2 -optc-mfpmath=sse -optc-msse2 -optc-march=pentium4 -} import Foreign.Ptr; import Foreign.Storable; import Foreign.Marshal.Alloc; import Foreign(unsafePerformIO); import Data.IORef; import Control.Monad; import System; import Text.Printf; data Vector3 = Vec !Double !Double !Double nr_of_planets :: Int nr_of_planets = 5 delta_t :: Double delta_t = 0.01 days_per_year :: Double days_per_year = 365.24 solar_mass :: Double solar_mass = 4 * pi ** 2; planets :: Ptr Double planets = unsafePerformIO $ mallocBytes (7 * nr_of_planets * 8) -- sizeOf double = 8 end :: Ptr Double end = inc planets $ nr_of_planets * 7 cursor :: IORef (Ptr Double) cursor = unsafePerformIO $ newIORef planets {-# INLINE inc #-} inc :: Ptr Double -> Int -> Ptr Double inc ptr n = plusPtr ptr $ n * 8 {-# INLINE new #-} new :: Double -> IO () new !d = do ptr <- readIORef cursor pokeElemOff ptr 0 d writeIORef cursor (inc ptr 1) {-# INLINE getVec #-} getVec :: Ptr Double -> IO Vector3 getVec ptr = let p = peekElemOff ptr in liftM3 Vec (p 0) (p 1) (p 2) {-# INLINE setVec #-} setVec :: Ptr Double -> Vector3 -> IO () setVec ptr (Vec x y z)= do pokeElemOff ptr 0 x pokeElemOff ptr 1 y pokeElemOff ptr 2 z {-# INLINE pos #-} pos :: Ptr Double -> Ptr Double pos ptr = ptr {-# INLINE vel #-} vel :: Ptr Double -> Ptr Double vel ptr = inc ptr 3 {-# INLINE mass #-} mass :: Ptr Double -> IO Double mass ptr = peekElemOff ptr 6 {-# INLINE (.+.) #-} (.+.) :: Vector3 -> Vector3 -> Vector3 (Vec x y z) .+. (Vec u v w) = Vec (x+u) (y+v) (z+w) {-# INLINE (.-.) #-} (.-.) :: Vector3 -> Vector3 -> Vector3 (Vec x y z) .-. (Vec u v w) = Vec (x-u) (y-v) (z-w) {-# INLINE (*.) #-} (*.) :: Double -> Vector3 -> Vector3 k *. (Vec x y z) = Vec (k*x) (k*y) (k*z) {-# INLINE magnitude2 #-} magnitude2 :: Vector3 -> Double magnitude2 (Vec x y z) = x*x + y*y + z*z infix 4 += infix 4 -= {-# INLINE (+=) #-} (+=) :: Ptr Double -> Vector3 -> IO () v1 += v2 = do v1' <- getVec v1 setVec v1 $ v1' .+. v2 {-# INLINE (-=) #-} (-=) :: Ptr Double -> Vector3 -> IO () v1 -= v2 = do v1' <- getVec v1 setVec v1 $ v1' .-. v2 initialize :: IO () initialize = let dp = days_per_year in mapM_ new [ 0, 0, 0, 0, 0, 0, 1 * solar_mass, 4.84143144246472090e+00, (-1.16032004402742839e+00), (-1.03622044471123109e-01), 1.66007664274403694e-03*dp, 7.69901118419740425e-03*dp, (-6.90460016972063023e-05)*dp, 9.54791938424326609e-04 * solar_mass, 8.34336671824457987e+00, 4.12479856412430479e+00, (-4.03523417114321381e-01), (-2.76742510726862411e-03)*dp, 4.99852801234917238e-03*dp, 2.30417297573763929e-05*dp, 2.85885980666130812e-04 * solar_mass, 1.28943695621391310e+01, (-1.51111514016986312e+01), (-2.23307578892655734e-01), 2.96460137564761618e-03*dp, 2.37847173959480950e-03*dp, (-2.96589568540237556e-05)*dp, 4.36624404335156298e-05 * solar_mass, 1.53796971148509165e+01, (-2.59193146099879641e+01), 1.79258772950371181e-01, 2.68067772490389322e-03*dp, 1.62824170038242295e-03*dp, (-9.51592254519715870e-05)*dp, 5.15138902046611451e-05 * solar_mass ] advance :: IO () advance = do let advance' !planet1 | planet1 /= end = do p1 <- getVec $ pos planet1 m1 <- mass planet1 let vel1 = vel planet1 let advance'' !planet2 | planet2 /= end = do p2 <- getVec $ pos planet2 m2 <- mass planet2 let vel2 = vel planet2 let difference = p1 .-. p2 let distance2 = magnitude2 difference let distance = sqrt distance2 let magnitude = delta_t / (distance2 * distance) let mass_magn = magnitude *. difference vel1 -= (m2 *. mass_magn) vel2 += (m1 *. mass_magn) advance'' (inc planet2 7) | otherwise = do v1 <- getVec vel1 planet1 += delta_t *. v1 let planet1' = (inc planet1 7) advance'' planet1' advance' planet1' | otherwise = return () advance' planets energy :: IO Double energy = do e <- newIORef 0 let energy' !planet1 | planet1 /= end = do p1 <- getVec $ pos planet1 v1 <- getVec $ vel planet1 m1 <- mass planet1 let energy'' !planet2 | planet2 /= end = do p2 <- getVec $ pos planet2 v2 <- getVec $ vel planet2 m2 <- mass planet2 let distance = sqrt $ magnitude2 $ p1 .-. p2 modifyIORef e (\x -> x - m1 * m2 / distance) energy'' (inc planet2 7) | otherwise = do modifyIORef e (+ 0.5 * m1 * (magnitude2 v1)) let planet1' = (inc planet1 7) energy'' planet1' energy' planet1' | otherwise = return () energy' planets readIORef e offset_momentum :: IO () offset_momentum = do let momentum' !planet = do v <- getVec $ vel planet m <- mass planet return $ m *. v momentum <- return.(foldl (.+.) (Vec 0 0 0)) =<< (mapM momentum' $ take (nr_of_planets - 1) $ iterate (\x -> inc x 7) (inc planets 7)) setVec (vel planets) $ (-1/solar_mass) *. momentum main = do n <- getArgs >>= readIO.head initialize offset_momentum energy >>= printf "%.9f\n" replicateM_ n advance energy >>= printf "%.9f\n"
4 Proposed entry
Complete rewrite, attention paid down to the asm level on the inner loops. Very much faster.
Note that in GHC -fexcess-precision is ignored on the command line!
{-# OPTIONS -fexcess-precision #-} -- -- The Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Don Stewart -- -- Compile with: -- -O -fglasgow-exts -fbang-patterns -optc-O3 -optc-march=pentium4 -- import System import System.IO.Unsafe import Text.Printf import Foreign.Marshal.Array import Foreign import Control.Monad import Bits import GHC.Exts ------------------------------------------------------------------------ main = do n <- getArgs >>= readIO . head offset_momentum printf "%.9f\n" =<< energy replicateM_ n advance printf "%.9f\n" =<< energy ------------------------------------------------------------------------ type Bodies = Ptr Double nbodies = 5 bodies :: Bodies bodies = unsafePerformIO . newArray . concat $ [planet 1 0 0 0 0 0 0 -- sun ,planet 9.54791938424326609e-04 -- jupiter 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05) ,planet 2.85885980666130812e-04 -- saturn 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05) ,planet 4.36624404335156298e-05 -- uranus 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05) ,planet 5.15138902046611451e-05 -- neptune 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)] where planet m x y z vx vy vz = [m*solar_mass,x, y, z, vx*days_per_year, vy*days_per_year, vz*days_per_year, 0] -- n.b widht of 8 doubles to make bitwise math nice -- field access (W# body) `at` (W# field) = I# (word2Int# (field `or#` (body `uncheckedShiftL#` 3#))) -- sizeof Double put !body !field = pokeElemOff bodies (body `at` field) get !body !field = peekElemOff bodies (body `at` field) -- -- field offsets in flattened array -- mass = 0 x = 1 y = 2 z = 3 vx = 4 vy = 5 vz = 6 solar_mass = 4 * pi * pi days_per_year = 365.24 ------------------------------------------------------------------------ -- Offset momentum offset_momentum :: IO () offset_momentum = do (px,py,pz) <- go 0 0 0 0 put 0 vx (- px / solar_mass) put 0 vy (- py / solar_mass) put 0 vz (- pz / solar_mass) where go !i !px !py !pz | i >= nbodies = return (px,py,pz) | otherwise = do imass <- look mass ivx <- look vx ivy <- look vy ivz <- look vz go (i+1) (px + ivx * imass) (py + ivy * imass) (pz + ivz * imass) where look = get i ------------------------------------------------------------------------ -- Energy energy = goI 0 0 goI :: Word -> Double -> IO Double goI !i !e | i >= nbodies = return e | otherwise = do im <- look mass ix <- look x -- cache the body offset calculation? iy <- look y iz <- look z ivx <- look vx ivy <- look vy ivz <- look vz e' <- goJ ix iy iz im (i+1) (e + 0.5 * im * (ivx * ivx + ivy * ivy + ivz * ivz)) goI (i+1) e' where look = get i goJ !ix !iy !iz !im !j !e | j >= nbodies = return e | otherwise = do b2m <- look mass b2x <- look x b2y <- look y b2z <- look z let dx = ix - b2x dy = iy - b2y dz = iz - b2z distance = sqrt $! dx * dx + dy * dy + dz * dz goJ ix iy iz im (j+1) (e - ((im * b2m) / distance)) where look = get j ------------------------------------------------------------------------ -- Advance advance :: IO () advance = advanceI 0 >> update advanceI i@(W# ii) = when (i < nbodies) $ do bm <- lookI mass bx <- lookI x -- cache the body offset calculation! by <- lookI y bz <- lookI z bvx <- lookI vx -- float these out to an accumultor bvy <- lookI vy bvz <- lookI vz advanceJ off bm bx by bz bvx bvy bvz (i+1) advanceI (i+1) where off = W# (ii `uncheckedShiftL#` 3#) lookI x = peekElemOff bodies (fromIntegral $ x .|. off) setI = put i advanceJ !i_off !bm !bx !by !bz !bvx !bvy !bvz !j@(W# jj) | j >= nbodies = do let setI x = pokeElemOff bodies (fromIntegral $ x .|. i_off) setI vx bvx setI vy bvy setI vz bvz | otherwise = do b2mass <- lookJ mass b2x <- lookJ x b2y <- lookJ y b2z <- lookJ z b2vx <- lookJ vx b2vy <- lookJ vy b2vz <- lookJ vz let dx = bx - b2x dy = by - b2y dz = bz - b2z distance = sqrt (dx * dx + dy * dy + dz * dz) mag = 0.01 / (distance * distance * distance) massmag = bm * mag setJ vx (b2vx + dx * massmag) setJ vy (b2vy + dy * massmag) setJ vz (b2vz + dz * massmag) let mass2mag = - b2mass * mag advanceJ i_off bm bx by bz (bvx + dx * mass2mag) (bvy + dy * mass2mag) (bvz + dz * mass2mag) (j+1) where off = W# (jj `uncheckedShiftL#` 3#) lookJ x = peekElemOff bodies (fromIntegral $ x .|. off) setJ x = pokeElemOff bodies (fromIntegral $ x .|. off) update = forM_ [0..nbodies-1] $ \i -> do let look = get i set = put i bx <- look x by <- look y bz <- look z bvx <- look vx bvy <- look vy bvz <- look vz set x (bx + 0.01 * bvx) set y (by + 0.01 * bvy) set z (bz + 0.01 * bvz)
5 old entry
Modifies the ghc flags. Submitted.
{-# OPTIONS -O -fglasgow-exts -fbang-patterns -funbox-strict-fields -fexcess-precision -optc-O -optc-march=pentium4 #-} -- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Christoph Bauer -- Written in Haskell by Chris Kuklewicz, further tweaks by Don Stewart -- -- -O2 -optc-O -fglasgow-exts -fexcess-precision -optc-ffast-math -- -- -optc-O3 cannot be used, as at least one version of gcc miscompiles this program -- import System import System.IO.Unsafe import Monad import Bits import List import Data.Array.IO import Data.Array.Base (unsafeRead,unsafeWrite) import Text.Printf import GHC.Base import GHC.Int default (Int) main = do n <- getArgs >>= readIO . head offsetMomentum energy >>= printf "%.9f\n" advance n energy >>= printf "%.9f\n" -- Offsets for each field x = 0; y = 1; z = 2; vx= 3; vy= 4; vz= 5; m = 6 type Bodies = IOUArray Int Double b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData) {-# NOINLINE b #-} set = unsafeWrite b {-# INLINE set #-} resetB = mapM_ (uncurry set) (zip [0..] bodiesData) -- sun jupiter saturn uranus neptune -- sun starts at center at rest bodiesData = concat [ mkB 1 0 0 0 0 0 0 ,mkB 9.54791938424326609e-04 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05) ,mkB 2.85885980666130812e-04 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05) ,mkB 4.36624404335156298e-05 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05) ,mkB 5.15138902046611451e-05 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)] mkB m x y z vx vy vz = [x, y, z, vx*days_per_year,vy*days_per_year, vz*days_per_year, m*solar_mass, 0] solar_mass = 4 * pi * pi days_per_year = 365.24 nbodies = 4 -- that is 0 to 4 mass :: Int -> IO Double mass i = unsafeRead b (massOffset .|. (shiftl i 3)) where massOffset = 6 -- Give the sun a small velocity so the total momentum of all bodies totals to zero offsetMomentum :: IO () offsetMomentum = do sm <- mass 0 let act i = mass i >>= \m -> addScaled 3 (-m/sm) (3 .|. (shiftl i 3)) mapM_ act [1..nbodies] -- Total all kineticE and potentialE energy = loop 0 0 where loop !i !e | i > nbodies = return e | otherwise = do ke <- kineticE i (loop' (i+1) i $! (e+ke)) >>= loop (i+1) loop' !j !i !e | j > nbodies = return e | otherwise = do pe <- potentialE i j loop' (j+1) i $! (e + pe) kineticE !i = do m <- mass i vx <- unsafeRead b (f vx) vy <- unsafeRead b (f vy) vz <- unsafeRead b (f vz) return $! 0.5 * m * (vx*vx + vy*vy + vz*vz) where f = (.|. (shiftl i 3)) potentialE !i !j = do m1 <- mass i m2 <- mass j dx <- liftM2 (-) (unsafeRead b (f x)) (unsafeRead b (g x)) dy <- liftM2 (-) (unsafeRead b (f y)) (unsafeRead b (g y)) dz <- liftM2 (-) (unsafeRead b (f z)) (unsafeRead b (g z)) return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz)) where f = (.|.) (shiftl j 3) g = (.|.) (shiftl i 3) addScaled !i !a !j = do set i1 =<< liftM2 scale (unsafeRead b i1) (unsafeRead b j1) set i2 =<< liftM2 scale (unsafeRead b i2) (unsafeRead b j2) set i3 =<< liftM2 scale (unsafeRead b i3) (unsafeRead b j3) where scale old new = old + a * new i1 = i; i2 = succ i1; i3 = succ i2; j1 = j; j2 = succ j1; j3 = succ j2; addScaled3 !i !a !jx !jy !jz = do set i1 =<< liftM (scale jx) (unsafeRead b i1) set i2 =<< liftM (scale jy) (unsafeRead b i2) set i3 =<< liftM (scale jz) (unsafeRead b i3) where scale new old = a * new + old i1 = i; i2 = succ i1; i3 = succ i2; -- This is the main code. Essentially all the time is spent here advance :: Int -> IO () advance n = when (n > 0) $ updateVel 0 >> advance (pred n) where updateVel i = when (i <= nbodies) $ do im <- unsafeRead b (f m) ix <- unsafeRead b (f x) iy <- unsafeRead b (f y) iz <- unsafeRead b (f z) ivx <- unsafeRead b (f vx) ivy <- unsafeRead b (f vy) ivz <- unsafeRead b (f vz) let updateVel' :: Double -> Double -> Double -> Int -> IO () updateVel' !ivx !ivy !ivz !j = if j > nbodies then do unsafeWrite b (f vx) ivx unsafeWrite b (f vy) ivy unsafeWrite b (f vz) ivz else do let g = (.|. shiftl j 3) jm <- unsafeRead b (g m) dx <- liftM (ix-) (unsafeRead b (g x)) dy <- liftM (iy-) (unsafeRead b (g y)) dz <- liftM (iz-) (unsafeRead b (g z)) let distance = sqrt (dx*dx+dy*dy+dz*dz) mag = 0.01 / (distance * distance * distance) addScaled3 (3 .|. (shiftl j 3)) ( im*mag) dx dy dz let a = -jm*mag ivx' = ivx+a*dx ivy' = ivy+a*dy ivz' = ivz+a*dz updateVel' ivx' ivy' ivz' $! (j+1) updateVel' ivx ivy ivz $! (i+1) addScaled (shiftl i 3) 0.01 (3 .|. (shiftl i 3)) updateVel (i+1) where f = (.|. shift i 3) shiftl = shiftL -- (I# i) (I# j) = I# (word2Int# (uncheckedShiftL# (int2Word# i) j))
6 Old entry
On a powerbook G4 (GHC 6.4.1, gcc 4.0.1) this is 1.7x time faster than dons+chris. 12.5 seconds versus 21.2 seconds for N=5,000,000 --ChrisKuklewicz Unboxing the strict fields gives a 3x speedup.
-- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Written by Joel Koerwer. -- Uses 7 STUArrays for the state. -- Edited for length by ChrisKuklewicz -- -- -O3 -optc-O3 -fexcess-precision -funbox-strict-fields -optc-ffast-math -- -- -funbox-strict-fields is critical -- -- -optc-ffast-math doesn't speed things up for me. -- Well, neither does -optc-O3, but that may just be here. -- import Control.Monad (liftM2,liftM3,liftM4) import Control.Monad.ST (ST, runST) import Data.Array.ST (STUArray, newListArray) import Data.Array.Base (unsafeRead, unsafeWrite) import System (getArgs) import Text.Printf (printf) (!) = unsafeRead :: STUArray s Int Double -> Int -> ST s Double writeArray = unsafeWrite :: STUArray s Int Double -> Int -> Double -> ST s () size :: Int = 5 dt :: Double = 0.01 data PhaseSpace s = PS {ms,rxs,rys,rzs,vxs,vys,vzs :: !(STUArray s Int Double)} main :: IO () main = do n <- getArgs >>= readIO . head printf "%.9f\n" $ runST (do state <- initialState offsetMomentum state energy state) printf "%.9f\n" $ runST (do state <- initialState offsetMomentum state energy state advance n state energy state) advance :: Int -> PhaseSpace s -> ST s () advance 0 _ = return () advance n sys = do kick sys drift sys advance (n-1) sys readAll (PS m x y z vx vy vz) i = do (x,y,z) <- liftM3 (,,) (x!i) (y!i) (z!i) (vx,vy,vz) <- liftM3 (,,) (vx!i) (vy!i) (vz!i) return (x,y,z,vx,vy,vz) {-# INLINE readAll #-} kick ps@(PS m rx ry rz vx vy vz) = outer 0 where outer i | i >= size = return () | otherwise = inner (i+1) >> outer (i+1) where inner j | j >= size = return () | otherwise = do (mi,mj) <- liftM2 (,) (m!i) (m!j) (rxi,ryi,rzi,vxi,vyi,vzi) <- readAll ps i (rxj,ryj,rzj,vxj,vyj,vzj) <- readAll ps j let (dx,dy,dz) = (rxi-rxj, ryi-ryj, rzi-rzj) dist2 = dx*dx + dy*dy + dz*dz mag = dt / (dist2 * sqrt dist2) writeArray vx i (vxi - dx*mj*mag) writeArray vy i (vyi - dy*mj*mag) writeArray vz i (vzi - dz*mj*mag) writeArray vx j (vxj + dx*mi*mag) writeArray vy j (vyj + dy*mi*mag) writeArray vz j (vzj + dz*mi*mag) inner (j+1) drift ps@(PS _ rxs rys rzs _ _ _) = loop 0 where loop i | i >= size = return () | otherwise = do (x,y,z,vx,vy,vz) <- readAll ps i writeArray rxs i (x + dt*vx) writeArray rys i (y + dt*vy) writeArray rzs i (z + dt*vz) loop (i+1) energy sys = liftM2 (+) (kineticEnergy sys) (potentialEnergy sys) kineticEnergy (PS ms _ _ _ vxs vys vzs) = loop 0 0 where loop i accum | i >= size = return (0.5 * accum) | otherwise = do (m,vx,vy,vz) <- liftM4 (,,,) (ms!i) (vxs!i) (vys!i) (vzs!i) let v2 = vx*vx + vy*vy + vz*vz loop (i+1) $! (accum + m*v2) potentialEnergy (PS ms rxs rys rzs _ _ _) = outer 0 0 where outer i a | i >= size = return a | otherwise = inner i (i+1) a >>= outer (i+1) inner i j a | j >= size = return a | otherwise = do (mi,xi,yi,zi) <- liftM4 (,,,) (ms!i) (rxs!i) (rys!i) (rzs!i) (mj,xj,yj,zj) <- liftM4 (,,,) (ms!j) (rxs!j) (rys!j) (rzs!j) let (dx,dy,dz) = (xi-xj, yi-yj, zi-zj) dist = sqrt (dx*dx + dy*dy + dz*dz) inner i (j+1) $! (a - mi*mj/dist) offsetMomentum (PS ms rxs rys rzs vxs vys vzs) = do (px,py,pz) <- calcMomentum 0 (0,0,0) (mSun,vSunx,vSuny,vSunz) <- liftM4 (,,,) (ms!0) (vxs!0) (vys!0) (vzs!0) writeArray vxs 0 (vSunx - px/mSun) writeArray vys 0 (vSuny - py/mSun) writeArray vzs 0 (vSunz - pz/mSun) where calcMomentum i (px,py,pz) | i >= size = return (px,py,pz) | otherwise = do (m,vx,vy,vz) <- liftM4 (,,,) (ms!i) (vxs!i) (vys!i) (vzs!i) calcMomentum (i+1) $! (px+vx*m,py+vy*m,pz+vz*m) initialState = do m <- mkSTUArray masses x <- mkSTUArray positionXs y <- mkSTUArray positionYs z <- mkSTUArray positionZs vx <- mkSTUArray velocityXs vy <- mkSTUArray velocityYs vz <- mkSTUArray velocityZs return (PS m x y z vx vy vz) mkSTUArray = newListArray (0,size-1) :: [Double] -> ST s (STUArray s Int Double) masses = map (4*pi*pi*) [1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05] positionXs = [0, 4.84143144246472090e+00, 8.34336671824457987e+00, 1.28943695621391310e+01, 1.53796971148509165e+01] positionYs = [0, (-1.16032004402742839e+00), 4.12479856412430479e+00, (-1.51111514016986312e+01), (-2.59193146099879641e+01)] positionZs = [0, (-1.03622044471123109e-01), (-4.03523417114321381e-01), (-2.23307578892655734e-01), 1.79258772950371181e-01] velocityXs = map (365.24*) [0, 1.66007664274403694e-03, (-2.76742510726862411e-03), 2.96460137564761618e-03, 2.68067772490389322e-03] velocityYs = map (365.24*) [0, 7.69901118419740425e-03, 4.99852801234917238e-03, 2.37847173959480950e-03, 1.62824170038242295e-03] velocityZs = map (365.24*) [0, (-6.90460016972063023e-05), 2.30417297573763929e-05, (-2.96589568540237556e-05), (-9.51592254519715870e-05)]
7 An STUArray Version (Joel Koerwer)
This one is slightly faster than chris+dons on my machine. The "proposed entry" has ben submitted, so I pushed it and its associated benchmarks down.
I get a speed up from the STUArray versus the dons+chris version on a Powerbook G4. Don+chris is 4.38 s user time, Joel's is 3.74 s user time (17% faster). This makes sense to me, since it does not need to shiftL and .|. the indices. I was cleaning it up without making it slower and I accidentally made it faster (I think by hoisting dt and size). -- ChrisKuklewicz
-- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Written by Joel Koerwer. -- Uses 7 STUArrays for the state. -- -- Performance is very similar to the IOUArray version of -- Chris Kuklewicz and Don Stewart. This one is slightly faster -- on my machine at n=5,000,000 (15 s vs 17 s). -- -- -O3 -optc-O3 -fexcess-precision -funbox-strict-fields -- -- -optc-ffast-math doesn't speed things up for me. -- Well, neither does -optc-O3, but that may just be here. -- import Control.Monad.ST (ST, runST) import Data.Array.ST (STUArray, newListArray) import Data.Array.Base (unsafeRead, unsafeWrite) import System (getArgs) import Text.Printf (printf) readArray :: STUArray s Int Double -> Int -> ST s Double readArray = unsafeRead writeArray :: STUArray s Int Double -> Int -> Double -> ST s () writeArray = unsafeWrite data PhaseSpace s = PS {size :: !Int, ms,rxs,rys,rzs,vxs,vys,vzs :: !(STUArray s Int Double)} advance :: Int -> Double -> PhaseSpace s -> ST s () advance 0 _ _ = return () advance n dt sys = do kick dt sys drift dt sys advance (n-1) dt sys kick :: Double -> PhaseSpace s -> ST s () kick dt (PS size m rx ry rz vx vy vz) = outer 0 where outer i | i >= size = return () | otherwise = inner (i+1) >> outer (i+1) where inner j | j >= size = return () | otherwise = do mi <- readArray m i mj <- readArray m j rxi <- readArray rx i rxj <- readArray rx j ryi <- readArray ry i ryj <- readArray ry j rzi <- readArray rz i rzj <- readArray rz j vxi <- readArray vx i vxj <- readArray vx j vyi <- readArray vy i vyj <- readArray vy j vzi <- readArray vz i vzj <- readArray vz j let dx = rxi-rxj dy = ryi-ryj dz = rzi-rzj dist2 = dx*dx + dy*dy + dz*dz dist = sqrt dist2 mag = dt / (dist*dist2) writeArray vx i (vxi - dx*mj*mag) writeArray vy i (vyi - dy*mj*mag) writeArray vz i (vzi - dz*mj*mag) writeArray vx j (vxj + dx*mi*mag) writeArray vy j (vyj + dy*mi*mag) writeArray vz j (vzj + dz*mi*mag) inner (j+1) drift :: Double -> PhaseSpace s -> ST s () drift dt (PS size _ rxs rys rzs vxs vys vzs) = loop 0 where loop i | i >= size = return () | otherwise = do x <- readArray rxs i y <- readArray rys i z <- readArray rzs i vx <- readArray vxs i vy <- readArray vys i vz <- readArray vzs i writeArray rxs i (x + dt*vx) writeArray rys i (y + dt*vy) writeArray rzs i (z + dt*vz) loop (i+1) energy :: PhaseSpace s -> ST s Double energy sys = do t <- kineticEnergy sys u <- potentialEnergy sys return (t+u) kineticEnergy :: PhaseSpace s -> ST s Double kineticEnergy (PS size ms _ _ _ vxs vys vzs) = loop 0 0 where loop i accum | i >= size = return (0.5 * accum) | otherwise = do m <- readArray ms i vx <- readArray vxs i vy <- readArray vys i vz <- readArray vzs i let v2 = vx*vx + vy*vy + vz*vz loop (i+1) (accum + m*v2) potentialEnergy :: PhaseSpace s -> ST s Double potentialEnergy (PS size ms rxs rys rzs _ _ _) = outer 0 0 where outer i a | i >= size = return a | otherwise = inner i (i+1) a >>= outer (i+1) inner i j a | j >= size = return a | otherwise = do mi <- readArray ms i mj <- readArray ms j xi <- readArray rxs i xj <- readArray rxs j yi <- readArray rys i yj <- readArray rys j zi <- readArray rzs i zj <- readArray rzs j let dx = xi-xj dy = yi-yj dz = zi-zj dist = sqrt (dx*dx + dy*dy + dz*dz) inner i (j+1) (a - mi*mj/dist) offsetMomentum :: PhaseSpace s -> ST s () offsetMomentum (PS size ms rxs rys rzs vxs vys vzs) = do (px,py,pz) <- calcMomentum 0 (0,0,0) mSun <- readArray ms 0 vSunx <- readArray vxs 0 vSuny <- readArray vys 0 vSunz <- readArray vzs 0 writeArray vxs 0 (vSunx - px/mSun) writeArray vys 0 (vSuny - py/mSun) writeArray vzs 0 (vSunz - pz/mSun) where calcMomentum i (px,py,pz) | i >= size = return (px,py,pz) | otherwise = do m <- readArray ms i vx <- readArray vxs i vy <- readArray vys i vz <- readArray vzs i let px' = px + vx*m py' = py + vy*m pz' = pz + vz*m calcMomentum (i+1) (px',py',pz') main = do n <- getArgs >>= readIO . head printf "%.9f\n" $ runST (do state <- initialState offsetMomentum state energy state) printf "%.9f\n" $ runST (do state <- initialState offsetMomentum state advance n 0.01 state energy state) initialState :: ST s (PhaseSpace s) initialState = do m <- mkSTUArray masses x <- mkSTUArray positionXs y <- mkSTUArray positionYs z <- mkSTUArray positionZs vx <- mkSTUArray velocityXs vy <- mkSTUArray velocityYs vz <- mkSTUArray velocityZs return (PS 5 m x y z vx vy vz) mkSTUArray :: [Double] -> ST s (STUArray s Int Double) mkSTUArray = newListArray (0,4) masses = map (4*pi*pi*) [1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05] positionXs = [0, 4.84143144246472090e+00, 8.34336671824457987e+00, 1.28943695621391310e+01, 1.53796971148509165e+01] positionYs = [0, (-1.16032004402742839e+00), 4.12479856412430479e+00, (-1.51111514016986312e+01), (-2.59193146099879641e+01)] positionZs = [0, (-1.03622044471123109e-01), (-4.03523417114321381e-01), (-2.23307578892655734e-01), 1.79258772950371181e-01] velocityXs = map (365.24*) [0, 1.66007664274403694e-03, (-2.76742510726862411e-03), 2.96460137564761618e-03, 2.68067772490389322e-03] velocityYs = map (365.24*) [0, 7.69901118419740425e-03, 4.99852801234917238e-03, 2.37847173959480950e-03, 1.62824170038242295e-03] velocityZs = map (365.24*) [0, (-6.90460016972063023e-05), 2.30417297573763929e-05, (-2.96589568540237556e-05), (-9.51592254519715870e-05)]
8 Proposed Entry (dons+chris)
A carefully tuned version of Chris' breakthrough entry. Runs faster than unoptimised gcc C. Be sure not to use -optc-O3 on linux, or badness may ensue. The OPTIONS pragma passes -O to gcc last, so it will override any other -O args to gcc passed on the cmd line.
{-# OPTIONS -optc-O #-} -- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Christoph Bauer -- Written in Haskell by Chris Kuklewicz, further tweaks by Don Stewart -- -- -O2 -optc-O -fglasgow-exts -fexcess-precision -optc-ffast-math -- -- -optc-O3 cannot be used, as at least one version of gcc miscompiles this program -- import System import System.IO.Unsafe import Monad import Data.Bits import Data.List import Data.Array.IO import Data.Array.Base(unsafeRead,unsafeWrite) import Text.Printf default (Int) main = do n <- getArgs >>= readIO . head offsetMomentum energy >>= printf "%.9f\n" advance n energy >>= printf "%.9f\n" -- Offsets for each field x = 0; y = 1; z = 2; vx= 3; vy= 4; vz= 5; m = 6 type Bodies = IOUArray Int Double b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData) {-# NOINLINE b #-} set = unsafeWrite b {-# INLINE set #-} resetB = mapM_ (uncurry set) (zip [0..] bodiesData) -- sun jupiter saturn uranus neptune -- sun starts at center at rest bodiesData = concat [ mkB 1 0 0 0 0 0 0 ,mkB 9.54791938424326609e-04 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05) ,mkB 2.85885980666130812e-04 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05) ,mkB 4.36624404335156298e-05 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05) ,mkB 5.15138902046611451e-05 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)] mkB m x y z vx vy vz = [x, y, z, vx*days_per_year,vy*days_per_year, vz*days_per_year, m*solar_mass, 0] solar_mass = 4 * pi * pi days_per_year = 365.24 nbodies = 4 -- that is 0 to 4 mass i = unsafeRead b (massOffset .|. (shiftL i 3)) where massOffset = 6 -- Give the sun a small velocity so the total momentum of all bodies totals to zero offsetMomentum :: IO () offsetMomentum = do sm <- mass 0 let act i = mass i >>= \m -> addScaled 3 (-m/sm) (3 .|. (shiftL i 3)) mapM_ act [1..nbodies] -- Total all kineticE and potentialE energy = loop 0 0 where loop i e | i > nbodies = return e | otherwise = do ke <- kineticE i (loop' (i+1) i $! (e+ke)) >>= loop (i+1) loop' j i e | j > nbodies = return e | otherwise = do pe <- potentialE i j loop' (j+1) i $! (e + pe) kineticE i = let i' = (.|. (shiftL i 3)) in do m <- mass i vx <- unsafeRead b (i' vx) vy <- unsafeRead b (i' vy) vz <- unsafeRead b (i' vz) return $! 0.5 * m * (vx*vx + vy*vy + vz*vz) potentialE i j = do m1 <- mass i m2 <- mass j let (i', j') = ((.|. (shiftL i 3)), (.|. (shiftL j 3))) dx <- liftM2 (-) (unsafeRead b (i' x)) (unsafeRead b (j' x)) dy <- liftM2 (-) (unsafeRead b (i' y)) (unsafeRead b (j' y)) dz <- liftM2 (-) (unsafeRead b (i' z)) (unsafeRead b (j' z)) return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz)) addScaled i a j | i `seq` a `seq` j `seq` False = undefined -- stricitfy addScaled i a j = do set i1 =<< liftM2 scale (unsafeRead b i1) (unsafeRead b j1) set i2 =<< liftM2 scale (unsafeRead b i2) (unsafeRead b j2) set i3 =<< liftM2 scale (unsafeRead b i3) (unsafeRead b j3) where scale old new = old + a * new i1 = i; i2 = succ i1; i3 = succ i2; j1 = j; j2 = succ j1; j3 = succ j2; addScaled3 i a jx jy jz | i `seq` a `seq` jx `seq` jy `seq` jz `seq` False = undefined addScaled3 i a jx jy jz = do set i1 =<< liftM (scale jx) (unsafeRead b i1) set i2 =<< liftM (scale jy) (unsafeRead b i2) set i3 =<< liftM (scale jz) (unsafeRead b i3) where scale new old = a * new + old i1 = i; i2 = succ i1; i3 = succ i2; -- This is the main code. Essentially all the time is spent here advance n = when (n > 0) $ updateVel 0 >> advance (pred n) where updateVel i = when (i <= nbodies) $ do let i' = (.|. shift i 3) im <- unsafeRead b (i' m) ix <- unsafeRead b (i' x) iy <- unsafeRead b (i' y) iz <- unsafeRead b (i' z) ivx <- unsafeRead b (i' vx) ivy <- unsafeRead b (i' vy) ivz <- unsafeRead b (i' vz) let updateVel' ivx ivy ivz j = ivx `seq` ivy `seq` ivz `seq` if j > nbodies then do unsafeWrite b (i' vx) ivx unsafeWrite b (i' vy) ivy unsafeWrite b (i' vz) ivz else do let j' = (.|. shiftL j 3) jm <- unsafeRead b (j' m) dx <- liftM (ix-) (unsafeRead b (j' x)) dy <- liftM (iy-) (unsafeRead b (j' y)) dz <- liftM (iz-) (unsafeRead b (j' z)) let distance = sqrt (dx*dx+dy*dy+dz*dz) mag = 0.01 / (distance * distance * distance) addScaled3 (3 .|. (shiftL j 3)) ( im*mag) dx dy dz let a = -jm*mag ivx' = ivx+a*dx ivy' = ivy+a*dy ivz' = ivz+a*dz updateVel' ivx' ivy' ivz' $! (j+1) updateVel' ivx ivy ivz $! (i+1) addScaled (shiftL i 3) 0.01 (3 .|. (shiftL i 3)) updateVel (i+1)
9 Improved entry (chris)
I made it go faster! By a factor of 2.5, which is not closing much of the gap. But any speed improvement is a breakthrough at this point. Hopefully someone else can see why it is faster and speed it up further. I am just tweaking things at random now.
All the n-body data (7 doubles: mass x y z vx vy vz) is put into a single IOUArray Int Double. Each body is offset by 8 indices, so there is a padding of 1 double to get an alignment of 8. This let me calculate the offset of each double with {{{shiftL _ 3}}} and . The array itself is a global variable.
-- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Christoph Bauer -- Rewritten in Haskell from C by Don Stewart -- Rewritten again by Chris Kuklewicz -- -- -O2 -optc-O3 -fglasgow-exts -fexcess-precision -optc-ffast-math -- -- This code actually runs faster then the previous entry! -- import Debug.Trace import System import System.IO.Unsafe import Monad import Data.Bits import Data.List import Data.Array.MArray import Data.Array.IO import Data.Array.Base(unsafeRead,unsafeWrite) import Text.Printf import Data.IORef default(Int) main = do args <- getArgs let n = if null args then 1000000 else read ( head args ) offsetMomentum energy >>= printf "%.9f\n" advance n energy >>= printf "%.9f\n" -- Offsets for each field x = 0 y = 1 z = 2 vx= 3 vy= 4 vz= 5 m = 6 type Bodies = IOUArray Int Double {-# NOINLINE b #-} b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData)) (bodiesData) get = unsafeRead b set = unsafeWrite b resetB = mapM_ (uncurry set) (zip [0..] bodiesData) -- sun jupiter saturn uranus neptune -- sun starts at center at rest bodiesData = concat [ mkB 1 0 0 0 0 0 0 ,mkB 9.54791938424326609e-04 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05) ,mkB 2.85885980666130812e-04 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05) ,mkB 4.36624404335156298e-05 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05) ,mkB 5.15138902046611451e-05 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05)] mkB m x y z vx vy vz = [x, y, z, vx*days_per_year, vy*days_per_year, vz*days_per_year, m*solar_mass, 0] solar_mass = 4 * pi * pi days_per_year = 365.24 nbodies = 4 -- that is 0 to 4 sh i = shiftL i 3 -- multiply by 8 pos i = sh i vel i = velOffset .|. sh i where velOffset = 3 mass i = get (massOffset .|. sh i) where massOffset = 6 -- Give the sun a small velocity so the total momentum of all bodies totals to zero offsetMomentum :: IO () offsetMomentum = do sm <- mass 0 let sv = vel 0 act i = mass i >>= \m -> addScaled sv (-m/sm) (vel i) mapM_ act [1..nbodies] -- Total all kineticE and potentialE energy :: IO Double energy = loop 0 0 where loop i e | i > nbodies = return e | otherwise = do ke <- kineticE i (loop' (i+1) i $! (e+ke)) >>= loop (i+1) loop' j i e | j > nbodies = return e | otherwise = do pe <- potentialE i j loop' (j+1) i $! (e + pe) kineticE i = let i' = (.|. sh i) in do m <- mass i vx <- get (i' vx) vy <- get (i' vy) vz <- get (i' vz) return $! 0.5 * m * (vx*vx + vy*vy + vz*vz) potentialE i j = do m1 <- mass i m2 <- mass j let i' = (.|. (sh i)) j' = (.|. (sh j)) dx <- liftM2 (-) (get (i' x)) (get (j' x)) dy <- liftM2 (-) (get (i' y)) (get (j' y)) dz <- liftM2 (-) (get (i' z)) (get (j' z)) return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz)) dt = 0.01 addScaled i a j = let scale old new = old + a * new i1=i; i2=succ i1; i3=succ i2; j1=j; j2=succ j1; j3=succ j2; in do set i1 =<< liftM2 scale (get i1) (get j1) set i2 =<< liftM2 scale (get i2) (get j2) set i3 =<< liftM2 scale (get i3) (get j3) addScaled3 i a jx jy jz = do let scale new old = a * new + old i1=i; i2=succ i1; i3=succ i2; set i1 =<< liftM (scale jx) (get i1) set i2 =<< liftM (scale jy) (get i2) set i3 =<< liftM (scale jz) (get i3) -- This is the main code. Essentially all the time is spent here advance n = when (n>0) $ do let {-# NOINLINE updateVel #-} updateVel i = when (i <= nbodies) $ do let i' = (.|. sh i) im <- get (i' m) ix <- get (i' x) iy <- get (i' y) iz <- get (i' z) ivx <- get (i' vx) ivy <- get (i' vy) ivz <- get (i' vz) let {-# INLINE updateVel' #-} updateVel' ivx ivy ivz j = ivx `seq` ivy `seq` ivz `seq` if j > nbodies then do set (i' vx) ivx set (i' vy) ivy set (i' vz) ivz else do let j' = (.|. sh j) jm <- get (j' m) dx <- liftM (ix-) (get (j' x)) dy <- liftM (iy-) (get (j' y)) dz <- liftM (iz-) (get (j' z)) let distance = sqrt (dx*dx+dy*dy+dz*dz) mag = dt / (distance * distance * distance) addScaled3 (vel j) ( im*mag) dx dy dz let a = -jm*mag ivx' = ivx+a*dx ivy' = ivy+a*dy ivz' = ivz+a*dz updateVel' ivx' ivy' ivz' $! (j+1) updateVel' ivx ivy ivz $! (i+1) addScaled (pos i) dt (vel i) updateVel (i+1) updateVel 0 advance (pred n)
10 Old attempt by ChrisKuklewicz
I heavily transformed Don's code. This now runs almost exactly as fast as Einar's code on my machine. So I am niether happy nor sad.
Note that this creates the {{{data MutVec = V ...}}} and {{{data Body = B}}} all at the beginning, puts them into an array, and never constructs any new ones. From them on it just looks up {{{IORef Double}}}'s and works on those.
I cannot find any Haskell idioms for making this run 10x faster, but other languages manage to do it, such as OCaml.
Would peeking and poking unboxed Doubles be the way to go?
-- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Christoph Bauer -- Rewritten in Haskell from C by Don Stewart -- Rewritten again by Chris Kuklewicz -- -- -O2 -optc-O3 -funbox-strict-fields -fglasgow-exts -fexcess-precision -fasm -- -- Runs at the same speed as the Haskell#3 open mutable records code -- by Einar. -- -- Advance is where ALL the work is done and where any optimization must go. -- import System import Monad import Data.List import Data.Array.Array import Text.Printf import Data.IORef default() type Bodies = Array Int Body data MutVec = V { x, y, z :: !(IORef Double) } data Body = B { mass :: !Double, pos,vel :: !MutVec } newV x y z = x `seq` y `seq` z `seq` do liftM3 V (newIORef x) (newIORef y) (newIORef z) mkB m x y z vx vy vz = liftM2 (B (m*solar_mass)) (newV x y z) (newV (vx*days_per_year) (vy*days_per_year) (vz*days_per_year)) {-# INLINE mIORef #-} mIORef r f = readIORef r >>= ( ($!) (writeIORef r) ) . f {-# INLINE minus #-} minus x = (\y -> y-x) {-# INLINE do2 #-} do2 fm m1 m2 = do x1 <- m1; x2 <- m2; fm x1 x2 {-# INLINE addScaled #-} addScaled v1@(V x1 y1 z1) a v2@(V x2 y2 z2) = do (mIORef x1).(+).(*a) =<< readIORef x2 (mIORef y1).(+).(*a) =<< readIORef y2 (mIORef z1).(+).(*a) =<< readIORef z2 {-# INLINE setDiff #-} setDiff d@(V dx dy dz) v1@(V x1 y1 z1) v2@(V x2 y2 z2) = do do2 (\a b->writeIORef dx $! (a-b)) (readIORef x1) (readIORef x2) do2 (\a b->writeIORef dy $! (a-b)) (readIORef y1) (readIORef y2) do2 (\a b->writeIORef dz $! (a-b)) (readIORef z1) (readIORef z2) return d {-# INLINE kineticE #-} kineticE mass (V x y z) = do vx <- readIORef x ; vy <- readIORef y ; vz <- readIORef z return $! 0.5 * mass * (vx*vx + vy*vy + vz*vz) {-# INLINE magV #-} magV (V x y z) = do liftM3 (\a b c->sqrt (a*a+b*b+c*c)) (readIORef x) (readIORef y) (readIORef z) >>= (return $!) {-# INLINE potentialE #-} potentialE mass1 (V x1 y1 z1) mass2 (V x2 y2 z2) = do dx <- liftM2 (-) (readIORef x1) (readIORef x2) dy <- liftM2 (-) (readIORef y1) (readIORef y2) dz <- liftM2 (-) (readIORef z1) (readIORef z2) return $! ((-1)*mass1*mass2/sqrt (dx*dx + dy*dy + dz*dz)) mkBodies :: IO Bodies mkBodies = liftM (listArray (0,nbodies-1)) $ sequence bodiesData -- sun jupiter saturn uranus neptune bodiesData = [ mkB 1 0 0 0 0 0 0 ,mkB 9.54791938424326609e-04 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03) ( 7.69901118419740425e-03) (-6.90460016972063023e-05) ,mkB 2.85885980666130812e-04 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) ( 4.99852801234917238e-03) ( 2.30417297573763929e-05) ,mkB 4.36624404335156298e-05 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03) ( 2.37847173959480950e-03) (-2.96589568540237556e-05) ,mkB 5.15138902046611451e-05 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03) ( 1.62824170038242295e-03) (-9.51592254519715870e-05) ] solar_mass = 4 * pi * pi days_per_year = 365.24 nbodies = length bodiesData offsetMomentum :: Bodies -> IO () offsetMomentum arr = sequence_ [ addScaled sv (-m/sm) v | (B m _ v) <- tail (elems arr) ] where (B sm sp sv) = (arr ! 0) energy :: Int -> Bodies -> IO Double energy nbodies arr = loop 0 0 where loop i e | i >= nbodies = return e | otherwise = do let (B mb bp bv) = arr ! i ke <- kineticE mb bv (loop' (i+1) bp mb $! (e+ke)) >>= loop (i+1) loop' j bp mb e | j >= nbodies = return e | otherwise = do let (B mb2 bp2 _) = arr ! j pe <- potentialE mb bp mb2 bp2 loop' (j+1) bp mb (e + pe) main = do (n::Int) <- getArgs >>= readIO . head bodies <- mkBodies dxyz <- newV 0 0 0 let dt = 0.01 advance = do let updateVel i = when (i < nbodies) $ do updateVel' (bodies ! i) (i+1) updateVel (i+1) updateVel' b1@(B mb1 bp1 bv1) j = if j >= nbodies then return () else do let (B mb2 bp2 bv2) = bodies ! j setDiff dxyz bp1 bp2 distance <- magV dxyz let mag = dt / (distance * distance * distance) addScaled bv2 (mb1*mag) dxyz addScaled bv1 (-mb2*mag) dxyz updateVel' b1 (j+1) updatePos i = when (i < nbodies) $ do let (B _ bp bv) = bodies ! i addScaled bp dt bv updatePos (i+1) updateVel 0 updatePos 0 offsetMomentum bodies energy nbodies bodies >>= printf "%.9f\n" sequence_ . replicate n $ advance energy nbodies bodies >>= printf "%.9f\n"
11 Joel Koerwer's nth attempt
I suppose the comments say it all.
-- Joel Koerwer's attempt at the Computer Shootout nbody problem. -- Not as fast as Einar Kartunnen's Open Mutable Records implementation, -- but I like it! -- I get about double the runtime of Einar's code. import System (getArgs) import Text.Printf (printf) data Vector3 = V3 !Double !Double !Double deriving (Show, Eq) instance Num Vector3 where (V3 x y z) + (V3 u v w) = V3 (x+u) (y+v) (z+w) (V3 x y z) - (V3 u v w) = V3 (x-u) (y-v) (z-w) (V3 x y z) * (V3 u v w) = V3 (x*u) (y*v) (z*w) fromInteger n = V3 (fromInteger n) (fromInteger n) (fromInteger n) abs (V3 x y z) = V3 (abs x) (abs y) (abs z) signum (V3 x y z) = V3 (signum x) (signum y) (signum z) -- Mnemonically read "scalar times vector". (.*|) :: Double -> Vector3 -> Vector3 a .*| (V3 x y z) = V3 (a*x) (a*y) (a*z) -- Similarly for "vector divided by scalar". (|/.) :: Vector3 -> Double -> Vector3 (V3 x y z) |/. a = V3 (x/a) (y/a) (z/a) dot :: Vector3 -> Vector3 -> Double (V3 x y z) `dot` (V3 u v w) = x*u + y*v + z*w norm2 v = v `dot` v norm = sqrt . norm2 type PhaseSpace = ([Vector3],[Vector3]) advance :: PhaseSpace -> PhaseSpace advance sys@(rs,vs) = let vs' = zipWith (+) vs $ getDeltaVs sys rs' = zipWith ((+) . (dt .*| )) vs' rs in (rs', vs') getDeltaVs :: PhaseSpace -> [Vector3] getDeltaVs sys@(rs,_) = let timesMdT = zipWith (.*|) dtms mags = getMags rs in map (sum . timesMdT) mags -- getMags does the "real work". Given a list of position vectors [r0,r1,..,rN] -- return the "magnitude matrix" -- -- [[ 0 m01 m02 .. m0N], -- [-m01 0 m12 .. m1N], -- [-m02 -m12 0 .. m2N], -- [ : : . : ], -- [ : : . : ], -- [-m0N -m1N -m2N .. 0 ]] -- -- where mij = -mji = (rj - ri) / |rj - ri|^3. -- -- This method is faster than calulating all differences -- seperately, but not by a whole lot. getMags :: [Vector3] -> [[Vector3]] getMags [] = [] getMags (r:rs) = let mkMag r' = let d = r' - r in d |/. (norm d * norm2 d) newMags = map mkMag rs minusMags = map negate newMags in (0 : newMags) : zipWith (:) minusMags (getMags rs) energy :: PhaseSpace -> Double energy sys = kineticEnergy sys + potentialEnergy sys -- T = 0.5 * sum_i mi * vi^2 kineticEnergy :: PhaseSpace -> Double kineticEnergy (_,vs) = 0.5 * (sum $ zipWith (*) ms (map norm2 vs)) -- U = sum_{i/=j} -0.5*mi*mj/|ri-rj| potentialEnergy :: PhaseSpace -> Double potentialEnergy (rs,_) = let a = -0.5 :: Double in (a *) . sum $ zipWith ((*) . sum . zipWith (*) ms) (invDists rs) ms -- Calculate the inverse distances between positions [r0,..rN]. -- Similar to getMags, but here, mij = |mij| = 1 / |ri-rj|. invDists :: [Vector3] -> [[Double]] invDists [] = [] invDists (r:rs) = let newDs = map ((1/) . norm . (r -)) rs in (0 : newDs) : zipWith (:) newDs (invDists rs) -- offset gives the Sun an initial velocity such that the entire system -- has zero overall momentum. offset :: PhaseSpace -> PhaseSpace offset (rs, vs@(vSun:vPlanets)) = let pTotal = sum $ zipWith (.*|) ms vs mSun = head ms dv = pTotal |/. mSun in (rs, vSun-dv : vPlanets) ---------- A strict version of iterate for our loop ----------- seqList [] y = y seqList (x:xs) y = x `seq` seqList xs y seqTuple (x,y) z = seqList x $ seqList y z iterate' f a = let b = f a in seqTuple b $ a : iterate' f b --------------------------------------------------------------- main = let sys = offset initialPhaseSpace print9 = printf "%.9f\n" in do [n'] <- getArgs let n = read n' :: Int print9 (energy sys) print9 . energy $ iterate' advance sys !! n -- I've stuck all the ugly constants and initial conditions at the end. dt = 0.01 solarMass = 4*pi*pi daysPerYear = 365.24 posns =[V3 0 0 0, V3 4.8414314424647209 (-1.16032004402742839) (-1.03622044471123109e-01), V3 8.34336671824457987 4.12479856412430479 (-4.03523417114321381e-01), V3 12.8943695621391310 (-15.1111514016986312) (-2.23307578892655734e-1), V3 15.3796971148509165 (-25.9193146099879641) (1.79258772950371181e-1) ] vels' =[ V3 0 0 0, V3 1.66007664274403694e-3 7.69901118419740425e-3 (-6.90460016972063023e-5), V3 (-2.76742510726862411e-3) 4.99852801234917238e-3 2.30417297573763929e-5, V3 2.96460137564761618e-3 2.37847173959480950e-3 (-2.96589568540237556e-5), V3 2.68067772490389322e-3 1.62824170038242295e-3 (-9.51592254519715870e-5) ] vels = map (daysPerYear .*|) vels' initialPhaseSpace = (posns, vels) dtms = map (dt *) ms -- The masses are only used by themselves in energy, so go -- ahead and precalculate m*dt. ms = map (solarMass *) [1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05]
12 An imperative entry
Based on the C version. Too many slow updates on the array still.
{-# OPTIONS -O2 -optc-O3 -funbox-strict-fields -fglasgow-exts #-} -- -- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Christoph Bauer -- Rewritten in Haskell from C by Don Stewart -- -- Advance is still too slow. -- import System import Monad import Data.Array.IO import Data.Array.Base import Text.Printf data Planet = P { x, y, z, vx, vy, vz :: !Double } deriving Show mass = [solar_mass ,9.54791938424326609e-04 * solar_mass ,2.85885980666130812e-04 * solar_mass ,4.36624404335156298e-05 * solar_mass ,5.15138902046611451e-05 * solar_mass] bodies =[P 0 0 0 0 0 0 -- sun -- jupiter ,P 4.84143144246472090e+00 -- jupiter (-1.16032004402742839e+00) (-1.03622044471123109e-01) ( 1.66007664274403694e-03 * days_per_year) ( 7.69901118419740425e-03 * days_per_year) (-6.90460016972063023e-05 * days_per_year) ,P 8.34336671824457987e+00 -- saturn 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03 * days_per_year) ( 4.99852801234917238e-03 * days_per_year) ( 2.30417297573763929e-05 * days_per_year) ,P 1.28943695621391310e+01 -- uranus (-1.51111514016986312e+01) (-2.23307578892655734e-01) ( 2.96460137564761618e-03 * days_per_year) ( 2.37847173959480950e-03 * days_per_year) (-2.96589568540237556e-05 * days_per_year) ,P 1.53796971148509165e+01 -- neptune (-2.59193146099879641e+01) 1.79258772950371181e-01 ( 2.68067772490389322e-03 * days_per_year) ( 1.62824170038242295e-03 * days_per_year) (-9.51592254519715870e-05 * days_per_year) ] solar_mass = 4 * pi * pi :: Double days_per_year = 365.24 nbodies = 5 offsetMomentum :: Int -> IOArray Int Planet -> IOUArray Int Double -> IO () offsetMomentum nbodies arr mass = do let loop i px py pz | i >= nbodies = return (px,py,pz) | otherwise = do p <- unsafeRead arr i mp <- unsafeRead mass i let px' = px + vx p * mp py' = py + vy p * mp pz' = pz + vz p * mp loop (i+1) px' py' pz' (px,py,pz) <- loop 0 0.0 0.0 0.0 p <- unsafeRead arr 0 unsafeWrite arr 0 $! p { vx = - px / solar_mass , vy = - py / solar_mass , vz = - pz / solar_mass } energy :: Int -> IOArray Int Planet -> IOUArray Int Double -> IO Double energy nbodies arr mass = loop 0 (0.0 :: Double) where loop i e | i >= nbodies = return e | otherwise = do b <- unsafeRead arr i mb<- unsafeRead mass i let (x,y,z) = (vx b, vy b, vz b) e' = e + 0.5 * mb * (x*x + y*y + z*z) e'' <- loop' (i+1) e' b mb loop (i+1) e'' loop' j e b mb | j >= nbodies = return e | otherwise = do b2 <- unsafeRead arr j mb2<- unsafeRead mass j let dx = x b - x b2 dy = y b - y b2 dz = z b - z b2 distance = sqrt $! dx * dx + dy * dy + dz * dz loop' (j+1) (e - ((mb * mb2) / distance)) b mb advance :: Int -> IOArray Int Planet -> IOUArray Int Double -> Double -> IO () advance nbodies arr mass dt = do for0 0 for3 0 where for0 i = when (i < nbodies) $ do b <- unsafeRead arr i mb <- unsafeRead mass i for1 (i+1) mb b >>= unsafeWrite arr i for0 (i+1) for1 j mb b = if j >= nbodies then return b else do b2 <- unsafeRead arr j mb2<- unsafeRead mass j let dx = x b - x b2 dy = y b - y b2 dz = z b - z b2 distance = sqrt $! dx * dx + dy * dy + dz * dz mag = dt / (distance * distance * distance) (m1,m2) = (mb2 * mag, mb * mag) unsafeWrite arr j $! b2 { vx = vx b2 + dx * m2 , vy = vy b2 + dy * m2 , vz = vz b2 + dz * m2 } for1 (j+1) mb $! b { vx = vx b - dx * m1 , vy = vy b - dy * m1 , vz = vz b - dz * m1 } for3 i = when (i < nbodies) $ do b <- unsafeRead arr i unsafeWrite arr i $! b { x = x b + dt * vx b , y = y b + dt * vy b , z = z b + dt * vz b } for3 (i+1) main = do (n::Int) <- getArgs >>= readIO . head arr <- newListArray (0,nbodies-1) bodies :: IO (IOArray Int Planet) massarr <- newListArray (0,nbodies-1) mass :: IO (IOUArray Int Double) offsetMomentum nbodies arr massarr energy nbodies arr massarr >>= printf "%.9f\n" sequence_ . replicate n $ advance nbodies arr massarr 0.01 energy nbodies arr massarr >>= printf "%.9f\n"
13 Original entry
This is the "Haskell GHC #3" entry, the fastest of the three.
-- The Great Computer Language Shootout -- http://shootout.alioth.debian.org/ -- NBody based on Open Mutable Records -- Contributed by Einar Karttunen import Control.Monad.Reader import Data.IORef import System import Text.Printf -- This is a monad for open mutable records programming newtype OO t r = OO (ReaderT t IO r) deriving(Monad, MonadReader t, MonadIO) -- run a computation with a record (object) with :: s -> OO s a -> OO b a with this (OO c) = liftIO (runReaderT c this) -- run an OO computation producing an IO value. ooToIO :: OO s a -> IO a ooToIO (OO c) = runReaderT c undefined -- the plain record types data a :.: r = RC !a !r infixr :.: data END = END -- Next we define a field access method. class Select r f t | r f -> t where (!) :: r -> f -> Ref t instance Select (Field f t :.: r) f t where (!) (RC (F x) _) _ = x instance Select r f t => Select (a :.: r) f t where (!) (RC _ t) = (!) t -- And finally the type of mutable fields. type Ref a = IORef a newtype Field name rtype = F (Ref rtype) -- Next we define a way to construct record values. infixr ## (##) :: v -> OO s r -> OO s ((Field f v) :.: r) (##) v r = do { h <- liftIO (newIORef v); t <- r; return (RC (F h) t) } end = return END :: OO s END -- Get the value of a field. value :: Select s f t => f -> OO s t value a = do x <- asks (\s -> s!a) liftIO (readIORef x) -- Or set the value of a field. (<-:) :: Select s f t => f -> t -> OO s () a <-: b = do x <- asks (\s -> s!a) liftIO (writeIORef x b) -- And as a convenience add value to an double field. (+=) :: Select s f V3 => f -> V3 -> OO s V3 a += b = do x <- asks (\s -> s!a) val <- liftIO (readIORef x) let z = val+b z `seq` liftIO (writeIORef x z) return z -- V3 vector like things. data V3 = V3 !Double !Double !Double deriving(Show,Eq) instance Num V3 where (V3 x1 y1 z1) + (V3 x2 y2 z2) = V3 (x1+x2) (y1+y2) (z1+z2) (V3 x1 y1 z1) - (V3 x2 y2 z2) = V3 (x1-x2) (y1-y2) (z1-z2) (V3 x1 y1 z1) * (V3 x2 y2 z2) = V3 (x1*x2) (y1*y2) (z1*z2) fromInteger o = let v = fromInteger o in V3 v v v instance Fractional V3 where (V3 x1 y1 z1) / (V3 x2 y2 z2) = V3 (x1/x2) (y1/y2) (z1/z2) from field obj = with obj (value field) dv3 v = V3 v v v v3len (V3 x y z) = sqrt (x*x + y*y + z*z) {-# RULE "dv3/mult" forall a b. dv3 a * (V3 x y z) = V3 (a*x) (a*y) (a*z) #-} -- Bodies data Base = Base; data Diff = Diff; data Mass = Mass type Body = Field Base V3 :.: Field Diff V3 :.: Field Mass Double :.: END -- Create a new body newBody x1 y1 z1 x2 y2 z2 mass = V3 x1 y1 z1 ## (dv3 year * V3 x2 y2 z2) ## (mass * solarMass) ## end :: OO s Body offsetMomemtum bodies@(b:_) = foldM comp 0 bodies >>= (\d -> with b (Diff <-: (-d / dv3 solarMass))) where comp old body = do diff <- Diff `from` body mass <- Mass `from` body return (old + (dv3 mass * diff)) advance dt bodies = advanceLoop dt bodies >> mapM_ comp bodies where comp body = with body ((\diff -> Base += (dv3 dt * diff)) =<< value Diff) advanceLoop :: Double -> [Body] -> OO s () advanceLoop dt [] = return () advanceLoop dt (b:bs) = do bmass <- Mass `from` b bbase <- Base `from` b let inner [] = return () inner (p:ps) = do diff <- (Base `from` p >>= \pbase -> return (bbase - pbase)) let dist = v3len diff let magdiff = dv3 (dt / (dist * dist * dist)) * diff pmass <- Mass `from` p with b (Diff += (dv3 pmass * (-magdiff))) with p (Diff += (dv3 bmass * magdiff)) inner ps inner bs >> advanceLoop dt bs energy :: [Body] -> Double -> OO s Double energy [] e = return e energy (p:ps) e = do pdiff@(V3 x y z) <- Diff `from` p pmass <- Mass `from` p pbase <- Base `from` p let e' = e + 0.5 * pmass * (x*x + y*y + z*z) let inner [] e = return e inner (j:js) e = do jmass <- Mass `from` j jbase <- Base `from` j inner js $! (e - ((pmass*jmass)/v3len(pbase - jbase))) inner ps e' >>= energy ps solarMass = 4*pi*pi :: Double year = 365.24 :: Double createBodies = do b0 <- newBody 0 0 0 0 0 0 1 b1 <- newBody 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01) (1.66007664274403694e-03) (7.69901118419740425e-03) (-6.90460016972063023e-05) 9.54791938424326609e-04 b2 <- newBody 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01) (-2.76742510726862411e-03) (4.99852801234917238e-03) (2.30417297573763929e-05) (2.85885980666130812e-04) b3 <- newBody 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01) 2.96460137564761618e-03 2.37847173959480950e-03 (-2.96589568540237556e-05) 4.36624404335156298e-05 b4 <- newBody 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01 2.68067772490389322e-03 1.62824170038242295e-03 (-9.51592254519715870e-05) 5.15138902046611451e-05 return [b0,b1,b2,b3,b4] main = do ~[n] <- getArgs (e1,e2) <- ooToIO (do bodies <- createBodies offsetMomemtum bodies e1 <- energy bodies 0 sequence_ $ replicate (read n) $ advance 0.01 bodies e2 <- energy bodies 0 return (e1,e2)) printf "%.9f\n%.9f\n" e1 e2
13.1 Smaller code
Anyone interested in seeing how small we can make the code? Here's my original attempt, which unfortunately suffers from a massive space-leak. I'd be interested in seeing if it is possible to have a succinct version without leaks.
import System(getArgs) import Numeric import Data.List data Vec = V !Double !Double !Double deriving Show instance Num Vec where (V a b c) + (V x y z) = (V (a+x) (b+y) (c+z)) (V a b c) - (V x y z) = (V (a-x) (b-y) (c-z)) fromInteger 0 = V 0.0 0.0 0.0 -- for sum function instance Eq Vec where (V a b c) == (V x y z) = (a==x) && (b==y) && (c==z) dot (V a b c) (V x y z) = a*x + b*y + c*z scale (V a b c) n = V (n*a) (n*b) (n*c) mag (V x y z) = sqrt (x*x + y*y + z*z) data Planet = Planet Vec Vec Double deriving Show --Position Velocity Mass dist (Planet p1 _ _) (Planet p2 _ _) = mag $ p1 - p2 mass (Planet _ _ m) = m vel (Planet _ v _) = v pos (Planet p _ _) = p main = do [arg] <- getArgs let iter = read arg let n = 5::Int let bodies = offset_momentum n [sun, jupiter, saturn, neptune, uranus] let begin = energy n bodies putStrLn $ showFFloat (Just 9) begin "" let final = (iterate (advance n 0.01) bodies) let end = energy n (final !! iter) putStrLn $ showFFloat (Just 9) end "" days_per_year = 365.24 solar_mass = 4.0 * pi * pi advance :: Int -> Double -> [Planet] -> [Planet] advance n dt (p:ps) = take n ((Planet (pos p + delta_x) (new_v) (mass p) ) : advance n dt (ps++[p])) where delta_v = sum (map (\q -> (pos p - pos q) `scale` ((mass q)*dt/(dist p q)^3)) ps) new_v = (vel p) - delta_v delta_x = new_v `scale` dt energy:: Int -> [Planet] -> Double energy n ps = kinetic - potential where kinetic = 0.5 * (sum (map (\q->(mass q)*((vel q) `dot` (vel q))) ps)) potential = sum $ zipWith grav_pot ps (tail (init (tails ps))) grav_pot x xs = sum $ map (\y -> (mass x)*(mass y)/(dist x y)) xs offset_momentum n ((Planet p v m):ps) = (Planet p new_v m):ps where new_v = (sum (map (\n->(vel n) `scale` (mass n)) ps)) `scale` ((-1.0)/solar_mass) jupiter = (Planet (V 4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01)) (V ( 1.66007664274403694e-03 * days_per_year) ( 7.69901118419740425e-03 * days_per_year) ((-6.90460016972063023e-05) * days_per_year)) (9.54791938424326609e-04 * solar_mass)) saturn = (Planet (V 8.34336671824457987e+00 4.12479856412430479e+00 (-4.03523417114321381e-01)) (V (-2.76742510726862411e-03 * days_per_year) (4.99852801234917238e-03 * days_per_year) (2.30417297573763929e-05 * days_per_year)) (2.85885980666130812e-04 * solar_mass)) uranus = (Planet (V 1.28943695621391310e+01 (-1.51111514016986312e+01) (-2.23307578892655734e-01)) (V (2.96460137564761618e-03 * days_per_year) (2.37847173959480950e-03 * days_per_year) (-2.96589568540237556e-05 * days_per_year)) (4.36624404335156298e-05 * solar_mass)) neptune = (Planet (V 1.53796971148509165e+01 (-2.59193146099879641e+01) 1.79258772950371181e-01) (V (2.68067772490389322e-03 * days_per_year) (1.62824170038242295e-03 * days_per_year) (-9.51592254519715870e-05 * days_per_year)) (5.15138902046611451e-05 * solar_mass)) sun = (Planet (V 0.0 0.0 0.0) (V 0.0 0.0 0.0) solar_mass)
