Skip to content

Commit

Permalink
free more thread state in jl_delete_thread and GC (JuliaLang#52198)
Browse files Browse the repository at this point in the history
This prevents most memory growth in workloads that start many foreign
threads. In the future, we could do even better by moving pages in the
heap of an exited thread (and also maybe pooled stacks) elsewhere so
they can be reused, and then also free the TLS object itself.
  • Loading branch information
vtjnash authored and tecosaur committed Mar 4, 2024
2 parents ca40c4d + 4840020 commit 9420345
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ void sweep_stack_pools(void)
assert(gc_n_threads);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 == NULL)
continue;

// free half of stacks that remain unused since last sweep
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
Expand All @@ -223,6 +225,12 @@ void sweep_stack_pools(void)
void *stk = small_arraylist_pop(al);
free_stack(stk, pool_sizes[p]);
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(al);
}
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(ptls2->heap.free_stacks);
}

small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
Expand Down
43 changes: 43 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1746,6 +1746,19 @@ void gc_free_pages(void)
}
}

void gc_move_to_global_page_pool(jl_gc_page_stack_t *pgstack)
{
while (1) {
jl_gc_pagemeta_t *pg = pop_lf_back(pgstack);
if (pg == NULL) {
break;
}
jl_gc_free_page(pg);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
push_lf_back(&global_page_pool_freed, pg);
}
}

// setup the data-structures for a sweep over all memory pools
static void gc_sweep_pool(void)
{
Expand Down Expand Up @@ -3775,6 +3788,24 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
else {
ptls2->heap.remset->len = 0;
}
// free empty GC state for threads that have exited
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL &&
(ptls->tid < gc_first_tid || ptls2->tid >= gc_first_tid + jl_n_gcthreads)) {
jl_thread_heap_t *heap = &ptls2->heap;
if (heap->weak_refs.len == 0)
small_arraylist_free(&heap->weak_refs);
if (heap->live_tasks.len == 0)
small_arraylist_free(&heap->live_tasks);
if (heap->remset->len == 0)
arraylist_free(heap->remset);
if (heap->last_remset->len == 0)
arraylist_free(heap->last_remset);
if (ptls2->finalizers.len == 0)
arraylist_free(&ptls2->finalizers);
if (ptls2->sweep_objs.len == 0)
arraylist_free(&ptls2->sweep_objs);
gc_move_to_global_page_pool(&ptls2->page_metadata_buffered);
}
}

#ifdef __GLIBC__
Expand Down Expand Up @@ -3993,6 +4024,18 @@ void jl_init_thread_heap(jl_ptls_t ptls)
jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
}

void jl_free_thread_gc_state(jl_ptls_t ptls)
{
jl_gc_markqueue_t *mq = &ptls->mark_queue;
ws_queue_t *cq = &mq->chunk_queue;
free_ws_array(jl_atomic_load_relaxed(&cq->array));
jl_atomic_store_relaxed(&cq->array, NULL);
ws_queue_t *q = &mq->ptr_queue;
free_ws_array(jl_atomic_load_relaxed(&q->array));
jl_atomic_store_relaxed(&q->array, NULL);
arraylist_free(&mq->reclaim_set);
}

// System-wide initializations
void jl_gc_init(void)
{
Expand Down
8 changes: 8 additions & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;

void jl_free_thread_gc_state(jl_ptls_t ptls);

static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
{
#ifndef _OS_WINDOWS_
Expand Down Expand Up @@ -508,6 +510,12 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
#else
pthread_mutex_unlock(&in_signal_lock);
#endif
free(ptls->bt_data);
small_arraylist_free(&ptls->locks);
ptls->previous_exception = NULL;
// allow the page root_task is on to be freed
ptls->root_task = NULL;
jl_free_thread_gc_state(ptls);
// then park in safe-region
(void)jl_gc_safe_enter(ptls);
}
Expand Down
6 changes: 6 additions & 0 deletions src/work-stealing-queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOT
return a;
}

static inline void free_ws_array(ws_array_t *a)
{
free(a->buffer);
free(a);
}

typedef struct {
_Atomic(int64_t) top;
char _padding[JL_CACHE_BYTE_ALIGNMENT - sizeof(_Atomic(int64_t))];
Expand Down

0 comments on commit 9420345

Please sign in to comment.