[commit: ghc] master: Reduce fragmentation when using +RTS -H (with or without a size) (a68df77)
Simon Marlow
marlowsd at gmail.com
Tue Aug 21 13:23:17 CEST 2012
Repository : ssh://darcs.haskell.org//srv/darcs/ghc
On branch : master
http://hackage.haskell.org/trac/ghc/changeset/a68df77ede928e6c7790dacb5925625792a904d3
>---------------------------------------------------------------
commit a68df77ede928e6c7790dacb5925625792a904d3
Author: Simon Marlow <marlowsd at gmail.com>
Date: Tue Aug 21 11:39:06 2012 +0100
Reduce fragmentation when using +RTS -H (with or without a size)
>---------------------------------------------------------------
rts/sm/BlockAlloc.c | 35 +++++++++++++++++++++++++++++++++++
rts/sm/BlockAlloc.h | 2 ++
rts/sm/Storage.c | 10 ++++++++--
3 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 8a1cfab..9fd3ef5 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -389,6 +389,41 @@ finish:
return bd;
}
+//
+// Allocate a chunk of blocks that is at most a megablock in size.
+// This API is used by the nursery allocator that wants contiguous
+// memory preferably, but doesn't require it. When memory is
+// fragmented we might have lots of large chunks that are less than a
+// full megablock, so allowing the nursery allocator to use these
+// reduces fragmentation considerably. e.g. on a GHC build with +RTS
+// -H, I saw fragmentation go from 17MB down to 3MB on a single compile.
+//
+bdescr *
+allocLargeChunk (void)
+{
+ bdescr *bd;
+ nat ln;
+
+ ln = 5; // start in the 32-63 block bucket
+ while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
+ ln++;
+ }
+ if (ln == MAX_FREE_LIST) {
+ return allocGroup(BLOCKS_PER_MBLOCK);
+ }
+ bd = free_list[ln];
+
+ n_alloc_blocks += bd->blocks;
+ if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+
+ dbl_link_remove(bd, &free_list[ln]);
+ initGroup(bd);
+
+ IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
+ IF_DEBUG(sanity, checkFreeListSanity());
+ return bd;
+}
+
bdescr *
allocGroup_lock(nat n)
{
diff --git a/rts/sm/BlockAlloc.h b/rts/sm/BlockAlloc.h
index f8b4204..d26bb24 100644
--- a/rts/sm/BlockAlloc.h
+++ b/rts/sm/BlockAlloc.h
@@ -11,6 +11,8 @@
#include "BeginPrivate.h"
+bdescr *allocLargeChunk (void);
+
/* Debugging -------------------------------------------------------------- */
extern nat countBlocks (bdescr *bd);
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index cadaf4d..6b32593 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -437,10 +437,16 @@ allocNursery (bdescr *tail, nat blocks)
// tiny optimisation (~0.5%), but it's free.
while (blocks > 0) {
- n = stg_min(blocks, BLOCKS_PER_MBLOCK);
+ if (blocks >= BLOCKS_PER_MBLOCK) {
+ bd = allocLargeChunk(); // see comment with allocLargeChunk()
+ n = bd->blocks;
+ } else {
+ bd = allocGroup(blocks);
+ n = blocks;
+ }
+
blocks -= n;
- bd = allocGroup(n);
for (i = 0; i < n; i++) {
initBdescr(&bd[i], g0, g0);
More information about the Cvs-ghc
mailing list