[commit: ghc] master: Avoid accumulating slop in the pinned_object_block. (cc2ea98)
Simon Marlow
marlowsd at gmail.com
Thu Apr 14 11:31:23 CEST 2011
Repository : ssh://darcs.haskell.org//srv/darcs/ghc
On branch : master
http://hackage.haskell.org/trac/ghc/changeset/cc2ea98ac4a15e40a15e89de9e47f33e191ba393
>---------------------------------------------------------------
commit cc2ea98ac4a15e40a15e89de9e47f33e191ba393
Author: Simon Marlow <marlowsd at gmail.com>
Date: Thu Apr 14 08:59:39 2011 +0100
Avoid accumulating slop in the pinned_object_block.
The pinned_object_block is where we allocate small pinned ByteArray#
objects. At a GC the pinned_object_block was being treated like other
large objects and promoted to the next step/generation, even if it was
only partly full. Under some ByteString-heavy workloads this would
accumulate on average 2k of slop per GC, and this memory is never
released until the ByteArray# objects in the block are freed.
So now, we keep allocating into the pinned_object_block until it is
completely full, at which point it is handed over to the GC as before.
The pinned_object_block might therefore contain objects which a large
range of ages, but I don't think this is any worse than the situation
before. We still have the fragmentation issue in general, but the new
scheme can improve the memory overhead for some workloads
dramatically.
>---------------------------------------------------------------
rts/Stats.c | 9 +++++++++
rts/sm/GC.c | 5 -----
rts/sm/Sanity.c | 4 ++++
rts/sm/Storage.c | 25 ++++++++++++++++++++-----
4 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/rts/Stats.c b/rts/Stats.c
index 159a909..3e7b5d8 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -798,6 +798,15 @@ statDescribeGens(void)
mut = 0;
for (i = 0; i < n_capabilities; i++) {
mut += countOccupied(capabilities[i].mut_lists[g]);
+
+ // Add the pinned object block.
+ bd = capabilities[i].pinned_object_block;
+ if (bd != NULL) {
+ gen_live += bd->free - bd->start;
+ gen_blocks += bd->blocks;
+ }
+
+ gen_live += gcThreadLiveWords(i,g);
gen_live += gcThreadLiveWords(i,g);
gen_blocks += gcThreadLiveBlocks(i,g);
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 05bc8f2..3036140 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -597,11 +597,6 @@ GarbageCollect (rtsBool force_major_gc,
// update the max size of older generations after a major GC
resize_generations();
- // Start a new pinned_object_block
- for (n = 0; n < n_capabilities; n++) {
- capabilities[n].pinned_object_block = NULL;
- }
-
// Free the mark stack.
if (mark_stack_top_bd != NULL) {
debugTrace(DEBUG_gc, "mark stack: %d blocks",
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 8ebb9a2..0ec552c 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -789,6 +789,7 @@ findMemoryLeak (void)
for (i = 0; i < n_capabilities; i++) {
markBlocks(nurseries[i].blocks);
+ markBlocks(capabilities[i].pinned_object_block);
}
#ifdef PROFILING
@@ -880,6 +881,9 @@ memInventory (rtsBool show)
for (i = 0; i < n_capabilities; i++) {
ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
nursery_blocks += nurseries[i].n_blocks;
+ if (capabilities[i].pinned_object_block != NULL) {
+ nursery_blocks += capabilities[i].pinned_object_block->blocks;
+ }
}
retainer_blocks = 0;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ae3433a..f8a9e55 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -657,17 +657,32 @@ allocatePinned (Capability *cap, lnat n)
// If we don't have a block of pinned objects yet, or the current
// one isn't large enough to hold the new object, allocate a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
+ // The pinned_object_block remains attached to the capability
+ // until it is full, even if a GC occurs. We want this
+ // behaviour because otherwise the unallocated portion of the
+ // block would be forever slop, and under certain workloads
+ // (allocating a few ByteStrings per GC) we accumulate a lot
+ // of slop.
+ //
+ // So, the pinned_object_block is initially marked
+ // BF_EVACUATED so the GC won't touch it. When it is full,
+ // we place it on the large_objects list, and at the start of
+ // the next GC the BF_EVACUATED flag will be cleared, and the
+ // block will be promoted as usual (if anything in it is
+ // live).
ACQUIRE_SM_LOCK;
- cap->pinned_object_block = bd = allocBlock();
- dbl_link_onto(bd, &g0->large_objects);
- g0->n_large_blocks++;
+ if (bd != NULL) {
+ dbl_link_onto(bd, &g0->large_objects);
+ g0->n_large_blocks++;
+ g0->n_new_large_words += bd->free - bd->start;
+ }
+ cap->pinned_object_block = bd = allocBlock();
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
- bd->flags = BF_PINNED | BF_LARGE;
+ bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
bd->free = bd->start;
}
- g0->n_new_large_words += n;
p = bd->free;
bd->free += n;
return p;
More information about the Cvs-ghc
mailing list