diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index 951d1ef1c05f97..f12a5b894116f2 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -90,9 +90,9 @@ 89be445dd4936157533ad96bafb95f701430653a - + /~https://github.com/dotnet/emsdk - 50bf805c8b5ca52abd34fde390609d8a54640246 + 446eeb331fcbf2f48c14a377601a8ab950ec942e @@ -398,5 +398,9 @@ /~https://github.com/NuGet/NuGet.Client 8fef55f5a55a3b4f2c96cd1a9b5ddc51d4b927f8 + + /~https://github.com/dotnet/installer + dbeae1ac71d95355452952059f35960991cb3fd2 + diff --git a/eng/Versions.props b/eng/Versions.props index 113383aab30cf2..a8dc199cc94ed3 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -205,8 +205,6 @@ 2.46.3 2.45.0 2.45.0 - - 8.0.100-rc.1.23415.5 1.1.2-beta1.23323.1 8.0.0-preview-20230918.1 @@ -240,7 +238,7 @@ Note: when the name is updated, make sure to update dependency name in eng/pipelines/common/xplat-setup.yml like - DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-8_0_100_Transport --> - 8.0.0-rtm.23469.3 + 8.0.0-rtm.23470.1 $(MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion) 1.1.87-gba258badda @@ -257,5 +255,8 @@ 3.1.7 1.0.406601 + + 8.0.100-rc.2.23470.7 + $(MicrosoftDotnetSdkInternalVersion) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index daeadfe9821b8c..7351954070725e 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -823,6 +823,11 @@ class t_join join_struct.r_join_lock = n_th; } + int get_num_threads() + { + return join_struct.n_threads; + } + void destroy () { dprintf (JOIN_LOG, ("Destroying join structure")); @@ -887,6 +892,8 @@ class t_join // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() if (color == join_struct.lock_color.LoadWithoutBarrier()) { + dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)", + gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier())); goto respin; } @@ -1117,6 +1124,25 @@ t_join bgc_t_join; } \ } +#define spin_and_wait(count_to_spin, expr) \ +{ \ + while (!expr) \ + { \ + for (int j = 0; j < count_to_spin; j++) \ + { \ + if (expr) \ + { \ + break; \ + } \ + YieldProcessor (); \ + } \ + if (!(expr)) \ + { \ + GCToOSInterface::YieldThread (0); \ + } \ + } \ +} + #ifdef BACKGROUND_GC #define max_pending_allocs 64 @@ -1429,8 +1455,6 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) { #ifdef DYNAMIC_HEAP_COUNT uint64_t start = GetHighPrecisionTimeStamp(); - - msl->msl_wait_count++; #endif //DYNAMIC_HEAP_COUNT unsigned int i = 0; @@ -1485,7 +1509,7 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) #ifdef DYNAMIC_HEAP_COUNT uint64_t end = GetHighPrecisionTimeStamp(); Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); - dprintf (6666, ("wait for msl lock total time: %zd, total count: %zd, this time: %zd, this count: %u", msl->msl_wait_time, msl->msl_wait_count, end - start, i)); + dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time)); #endif //DYNAMIC_HEAP_COUNT } while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); @@ -2318,9 +2342,6 @@ sorted_table* gc_heap::seg_table; #ifdef MULTIPLE_HEAPS GCEvent gc_heap::ee_suspend_event; -#ifdef DYNAMIC_HEAP_COUNT -GCEvent gc_heap::gc_idle_thread_event; -#endif //DYNAMIC_HEAP_COUNT size_t gc_heap::min_gen0_balance_delta = 0; size_t gc_heap::min_balance_threshold = 0; #endif //MULTIPLE_HEAPS @@ -2919,6 +2940,12 @@ BOOL gc_heap::should_expand_in_full_gc = FALSE; #ifdef DYNAMIC_HEAP_COUNT int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default; gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data; +uint64_t gc_heap::last_suspended_end_time = 0; +size_t gc_heap::gc_index_full_gc_end = 0; + +#ifdef STRESS_DYNAMIC_HEAP_COUNT +int gc_heap::heaps_in_this_gc = 0; +#endif //STRESS_DYNAMIC_HEAP_COUNT #endif // DYNAMIC_HEAP_COUNT // Provisional mode related stuff. @@ -6967,12 +6994,6 @@ BOOL gc_heap::create_thread_support (int number_of_heaps) { goto cleanup; } -#ifdef DYNAMIC_HEAP_COUNT - if (!gc_idle_thread_event.CreateOSManualEventNoThrow (FALSE)) - { - goto cleanup; - } -#endif //DYNAMIC_HEAP_COUNT if (!ee_suspend_event.CreateOSAutoEventNoThrow (FALSE)) { goto cleanup; @@ -7020,10 +7041,6 @@ bool gc_heap::create_gc_thread () return GCToEEInterface::CreateThread(gc_thread_stub, this, false, ".NET Server GC"); } -#ifdef DYNAMIC_HEAP_COUNT -static size_t prev_change_heap_count_gc_index; -#endif //DYNAMIC_HEAP_COUNT - #ifdef _MSC_VER #pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path #endif //_MSC_VER @@ -7042,18 +7059,87 @@ void gc_heap::gc_thread_function () if (heap_number == 0) { - uint32_t wait_result = gc_heap::ee_suspend_event.Wait(gradual_decommit_in_progress_p ? DECOMMIT_TIME_STEP_MILLISECONDS : INFINITE, FALSE); + bool wait_on_time_out_p = gradual_decommit_in_progress_p; + uint32_t wait_time = DECOMMIT_TIME_STEP_MILLISECONDS; +#ifdef DYNAMIC_HEAP_COUNT + // background_running_p can only change from false to true during suspension. + if (!gc_heap::background_running_p () && dynamic_heap_count_data.should_change_heap_count) + { + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); + + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + wait_time = min (wait_time, (uint32_t)(sample.elapsed_between_gcs / 1000 / 3)); + wait_time = max (wait_time, 1); + + dprintf (6666, ("gc#0 thread waiting for %d ms (betwen GCs %I64d)", wait_time, sample.elapsed_between_gcs)); + } +#endif //DYNAMIC_HEAP_COUNT + uint32_t wait_result = gc_heap::ee_suspend_event.Wait(wait_on_time_out_p ? wait_time : INFINITE, FALSE); + dprintf (9999, ("waiting for ee done res %d (timeout %d, %I64d ms since last suspend end)(should_change_heap_count is %d) (gradual_decommit_in_progress_p %d)", + wait_result, wait_time, ((GetHighPrecisionTimeStamp() - last_suspended_end_time) / 1000), + dynamic_heap_count_data.should_change_heap_count, gradual_decommit_in_progress_p)); if (wait_result == WAIT_TIMEOUT) { - decommit_lock.Enter(); - gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); - decommit_lock.Leave(); +#ifdef DYNAMIC_HEAP_COUNT + if (dynamic_heap_count_data.should_change_heap_count) + { +#ifdef BACKGROUND_GC + if (!gc_heap::background_running_p ()) +#endif //BACKGROUND_GC + { + dprintf (6666, ("changing heap count due to timeout")); + check_heap_count(); + } + } +#endif //DYNAMIC_HEAP_COUNT + + if (gradual_decommit_in_progress_p) + { + decommit_lock.Enter (); + gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); + decommit_lock.Leave (); + } continue; } +#ifdef DYNAMIC_HEAP_COUNT + // We might want to consider also doing this when a BGC finishes. + if (dynamic_heap_count_data.should_change_heap_count) + { +#ifdef BACKGROUND_GC + if (!gc_heap::background_running_p ()) +#endif //BACKGROUND_GC + { + // this was a request to do a GC so make sure we follow through with one. + dprintf (6666, ("changing heap count at a GC start")); + check_heap_count (); + } + } + + // wait till the threads that should have gone idle at least reached the place where they are about to wait on the idle event. + if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && + (n_heaps != dynamic_heap_count_data.last_n_heaps)) + { + int spin_count = 1024; + int idle_thread_count = n_max_heaps - n_heaps; + dprintf (9999, ("heap count changed %d->%d, idle should be %d and is %d", dynamic_heap_count_data.last_n_heaps, n_heaps, + idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count))); + if (idle_thread_count != dynamic_heap_count_data.idle_thread_count) + { + spin_and_wait (spin_count, (idle_thread_count == dynamic_heap_count_data.idle_thread_count)); + dprintf (9999, ("heap count changed %d->%d, now idle is %d", dynamic_heap_count_data.last_n_heaps, n_heaps, + VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count))); + } + + dynamic_heap_count_data.last_n_heaps = n_heaps; + } +#endif //DYNAMIC_HEAP_COUNT + suspended_start_time = GetHighPrecisionTimeStamp(); BEGIN_TIMING(suspend_ee_during_log); + dprintf (9999, ("h0 suspending EE in GC!")); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC); + dprintf (9999, ("h0 suspended EE in GC!")); END_TIMING(suspend_ee_during_log); proceed_with_gc_p = TRUE; @@ -7067,46 +7153,74 @@ void gc_heap::gc_thread_function () { settings.init_mechanisms(); #ifdef DYNAMIC_HEAP_COUNT - // make sure the other gc threads cannot see this as a request to change heap count - // see explanation below about the cases when we return from gc_start_event.Wait - assert (dynamic_heap_count_data.new_n_heaps == n_heaps); + if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) + { + // make sure the other gc threads cannot see this as a request to change heap count + // see explanation below about the cases when we return from gc_start_event.Wait + assert (dynamic_heap_count_data.new_n_heaps == n_heaps); + } #endif //DYNAMIC_HEAP_COUNT + dprintf (9999, ("GC thread %d setting_gc_start_in_gc(h%d)", heap_number, n_heaps)); gc_start_event.Set(); } dprintf (3, (ThreadStressLog::gcServerThread0StartMsg(), heap_number)); } else { + dprintf (9999, ("GC thread %d waiting_for_gc_start(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier(&settings.gc_index))); gc_start_event.Wait(INFINITE, FALSE); #ifdef DYNAMIC_HEAP_COUNT - // we have a couple different cases to handle here when we come back from the wait: - // 1. We are starting a GC. Signaled by dynamic_heap_count_data.new_n_heaps == n_heaps - // a) We are starting a GC, but this thread is idle. Signaled by n_heaps <= heap_number - // b) We are starting a GC, and this thread is participating. Signaled by heap_number < n_heaps - // 2. We are changing heap count. Signaled by dynamic_heap_count_data.new_n_heaps != n_heaps - // a) We are changing heap count, but this thread is idle. Signaled by n_heaps <= heap_number. - // b) We are changing heap count, and this thread is participating. Signaled by heap_number < n_heaps. - - // check for 1.a) and 2.a) cases above - if (n_heaps <= heap_number) - { - dprintf (2, ("GC thread %d idle", heap_number)); - - // make sure GC is complete so we know the gc_idle_thread_event has been reset - g_theGCHeap->WaitUntilGCComplete(); + dprintf (9999, ("GC thread %d waiting_done_gc_start(%d-%d)(i: %d)(gc%Id)", + heap_number, n_heaps, dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.init_only_p, VolatileLoadWithoutBarrier (&settings.gc_index))); + + if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && + (dynamic_heap_count_data.new_n_heaps != n_heaps)) + { + // The reason why we need to do this is - + // + for threads that were participating, we need them to do work for change_heap_count + // + for threads that were not participating but will need to participate, we need to make sure they are woken now instead of + // randomly sometime later. + int old_n_heaps = n_heaps; + int new_n_heaps = dynamic_heap_count_data.new_n_heaps; + int num_threads_to_wake = max (new_n_heaps, old_n_heaps); + if (heap_number < num_threads_to_wake) + { + dprintf (9999, ("h%d < %d, calling change", heap_number, num_threads_to_wake)); + change_heap_count (dynamic_heap_count_data.new_n_heaps); + if (new_n_heaps < old_n_heaps) + { + dprintf (9999, ("h%d after change", heap_number)); + // at the end of change_heap_count we've changed join's heap count to the new one if it's smaller. So we need to make sure + // only that many threads will participate in the following GCs. + if (heap_number < new_n_heaps) + { + dprintf (9999, ("h%d < %d participating (dec)", heap_number, new_n_heaps)); + } + else + { + Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count); + dprintf (9999, ("GC thread %d wait_on_idle(%d < %d)(gc%Id), total idle %d", heap_number, old_n_heaps, new_n_heaps, + VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count))); + gc_idle_thread_event.Wait (INFINITE, FALSE); + dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id) after doing change", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index))); + } + } + else + { + dprintf (9999, ("h%d < %d participating (inc)", heap_number, new_n_heaps)); + } + } + else + { + Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count); + dprintf (9999, ("GC thread %d wait_on_idle(< max %d)(gc%Id), total idle %d", heap_number, num_threads_to_wake, + VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count))); + gc_idle_thread_event.Wait (INFINITE, FALSE); + dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index))); + } - // now wait on the gc_idle_thread_event - gc_idle_thread_event.Wait(INFINITE, FALSE); - dprintf (2, ("GC thread %d waking from idle", heap_number)); - continue; - } - // case 2.b) above: is this a request to change heap count? - if (dynamic_heap_count_data.new_n_heaps != n_heaps) - { - change_heap_count (dynamic_heap_count_data.new_n_heaps); continue; } - // case 1.b) above: we're starting a GC. #endif //DYNAMIC_HEAP_COUNT dprintf (3, (ThreadStressLog::gcServerThreadNStartMsg(), heap_number)); } @@ -7191,10 +7305,6 @@ void gc_heap::gc_thread_function () { gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); } -#ifdef DYNAMIC_HEAP_COUNT - // check if we should adjust the number of heaps - check_heap_count(); -#endif //DYNAMIC_HEAP_COUNT } else { @@ -12527,6 +12637,16 @@ void gc_heap::rearrange_uoh_segments() freeable_uoh_segment = 0; } +void gc_heap::delay_free_segments() +{ + rearrange_uoh_segments(); +#ifdef BACKGROUND_GC + background_delay_delete_uoh_segments(); + if (!gc_heap::background_running_p()) + rearrange_small_heap_segments(); +#endif //BACKGROUND_GC +} + #ifndef USE_REGIONS void gc_heap::rearrange_heap_segments(BOOL compacting) { @@ -14860,6 +14980,25 @@ gc_heap::init_gc_heap (int h_number) gc_done_event_lock = -1; gc_done_event_set = false; +#ifdef DYNAMIC_HEAP_COUNT + if (h_number != 0) + { + if (!gc_idle_thread_event.CreateAutoEventNoThrow (FALSE)) + { + return 0; + } + +#ifdef BACKGROUND_GC + if (!bgc_idle_thread_event.CreateAutoEventNoThrow (FALSE)) + { + return 0; + } +#endif //BACKGROUND_GC + + dprintf (9999, ("creating idle events for h%d", h_number)); + } +#endif //DYNAMIC_HEAP_COUNT + if (!init_dynamic_data()) { return 0; @@ -16038,7 +16177,6 @@ void min_fl_list_info::thread_item_no_prev (uint8_t* item) tail = item; } -// This is only implemented for gen2 right now!!!! // the min_fl_list array is arranged as chunks of n_heaps min_fl_list_info, the 1st chunk corresponds to the 1st bucket, // and so on. void allocator::rethread_items (size_t* num_total_fl_items, size_t* num_total_fl_items_rethreaded, gc_heap* current_heap, @@ -17406,6 +17544,7 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size, gen_number, align_const); dd_new_allocation (dynamic_data_of (gen_number)) -= limit; + size_t saved_free_list_size = free_list_size; #ifdef FEATURE_LOH_COMPACTION if (loh_pad) { @@ -17434,7 +17573,7 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size, { generation_free_obj_space (gen) += remain_size; } - generation_free_list_space (gen) -= free_list_size; + generation_free_list_space (gen) -= saved_free_list_size; assert ((ptrdiff_t)generation_free_list_space (gen) >= 0); generation_free_list_allocated (gen) += limit; @@ -22000,11 +22139,70 @@ BOOL gc_heap::should_proceed_with_gc() void gc_heap::update_end_gc_time_per_heap() { +#ifdef DYNAMIC_HEAP_COUNT + size_t prev_gen2_end_time = 0; + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (settings.condemned_generation == max_generation)) + { + dynamic_data* dd = dynamic_data_of (max_generation); + prev_gen2_end_time = dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd);; + } +#endif //DYNAMIC_HEAP_COUNT + for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++) { dynamic_data* dd = dynamic_data_of (gen_number); + + if (heap_number == 0) + { + dprintf (6666, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d", + gen_number, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)))); + } + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); + + if (heap_number == 0) + { + dprintf (6666, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd))); + } + } + +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + sample.elapsed_between_gcs = end_gc_time - last_suspended_end_time; + sample.gc_pause_time = dd_gc_elapsed_time (dynamic_data_of (0)); + sample.msl_wait_time = get_msl_wait_time(); + + dprintf (6666, ("sample#%d: this GC end %I64d - last sus end %I64d = %I64d, this GC pause %I64d, msl wait %I64d", + dynamic_heap_count_data.sample_index, end_gc_time, last_suspended_end_time, sample.elapsed_between_gcs, sample.gc_pause_time, sample.msl_wait_time)); + + last_suspended_end_time = end_gc_time; + + GCEventFireHeapCountSample_V1 ( + (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index), + sample.elapsed_between_gcs, + sample.gc_pause_time, + sample.msl_wait_time); + + dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size; + + if (settings.condemned_generation == max_generation) + { + gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0)); + size_t elapsed_between_gen2_gcs = end_gc_time - prev_gen2_end_time; + size_t gen2_elapsed_time = sample.gc_pause_time; + dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs; + + dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f", + dynamic_heap_count_data.gen2_sample_index, end_gc_time, prev_gen2_end_time, elapsed_between_gen2_gcs, + gen2_elapsed_time, dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index])); + dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size; + } + + calculate_new_heap_count (); } +#endif //DYNAMIC_HEAP_COUNT } void gc_heap::update_end_ngc_time() @@ -22151,7 +22349,31 @@ void gc_heap::gc1() { dynamic_data* dd = dynamic_data_of (n); end_gc_time = GetHighPrecisionTimeStamp(); + size_t time_since_last_gen2 = 0; + +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + time_since_last_gen2 = (size_t)(end_gc_time - (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd))); + dprintf (6666, ("BGC %Id end %I64d - (prev gen2 start %I64d + elapsed %Id = %I64d) = time inbewteen gen2 %Id", + dd_gc_clock (dd), end_gc_time, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)), time_since_last_gen2)); + } +#endif //DYNAMIC_HEAP_COUNT + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + dprintf (6666, ("updating BGC %Id elapsed time to %I64d - %I64d = %I64d", dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd))); + + float bgc_percent = (float)dd_gc_elapsed_time (dd) * 100.0f / (float)time_since_last_gen2; + dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = bgc_percent; + dprintf (6666, ("gen2 sample %d elapsed %Id * 100 / time inbetween gen2 %Id = %.3f", + dynamic_heap_count_data.gen2_sample_index, dd_gc_elapsed_time (dd), time_since_last_gen2, bgc_percent)); + dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size; + gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0)); + } +#endif //DYNAMIC_HEAP_COUNT #ifdef HEAP_BALANCE_INSTRUMENTATION if (heap_number == 0) @@ -22758,7 +22980,12 @@ void gc_heap::merge_fl_from_other_heaps (int gen_idx, int to_n_heaps, int from_n assert (free_list_space_decrease <= generation_free_list_space (gen)); generation_free_list_space (gen) -= free_list_space_decrease; - assert (free_list_space_decrease <= dd_fragmentation (dd)); + // TODO - I'm seeing for gen2 this is free_list_space_decrease can be a bit larger than frag. + // Need to fix this later. + if (gen_idx != max_generation) + { + assert (free_list_space_decrease <= dd_fragmentation (dd)); + } size_t free_list_space_increase = 0; for (int from_hn = 0; from_hn < from_n_heaps; from_hn++) @@ -23733,9 +23960,6 @@ void gc_heap::garbage_collect (int n) #ifdef MULTIPLE_HEAPS gc_start_event.Reset(); -#ifdef DYNAMIC_HEAP_COUNT - gc_idle_thread_event.Reset(); -#endif //DYNAMIC_HEAP_COUNT gc_t_join.restart(); #endif //MULTIPLE_HEAPS } @@ -23757,6 +23981,9 @@ void gc_heap::garbage_collect (int n) #endif // STRESS_HEAP #ifdef MULTIPLE_HEAPS +#ifdef STRESS_DYNAMIC_HEAP_COUNT + Interlocked::Increment (&heaps_in_this_gc); +#endif //STRESS_DYNAMIC_HEAP_COUNT //align all heaps on the max generation to condemn dprintf (3, ("Joining for max generation to condemn")); condemned_generation_num = generation_to_condemn (n, @@ -23772,30 +23999,31 @@ void gc_heap::garbage_collect (int n) #endif //FEATURE_BASICFREEZE #ifdef MULTIPLE_HEAPS +#ifdef STRESS_DYNAMIC_HEAP_COUNT + dprintf (9999, ("%d heaps, join sees %d, actually joined %d, %d idle threads (%d)", + n_heaps, gc_t_join.get_num_threads (), heaps_in_this_gc, + VolatileLoadWithoutBarrier(&dynamic_heap_count_data.idle_thread_count), (n_max_heaps - n_heaps))); + if (heaps_in_this_gc != n_heaps) + { + dprintf (9999, ("should have %d heaps but actually have %d!!", n_heaps, heaps_in_this_gc)); + GCToOSInterface::DebugBreak (); + } + + heaps_in_this_gc = 0; +#endif //STRESS_DYNAMIC_HEAP_COUNT + for (int i = 0; i < n_heaps; i++) { gc_heap* hp = g_heaps[i]; // check for card table growth if (g_gc_card_table != hp->card_table) hp->copy_brick_card_table(); - - hp->rearrange_uoh_segments(); -#ifdef BACKGROUND_GC - hp->background_delay_delete_uoh_segments(); - if (!gc_heap::background_running_p()) - hp->rearrange_small_heap_segments(); -#endif //BACKGROUND_GC + hp->delay_free_segments(); } #else //MULTIPLE_HEAPS if (g_gc_card_table != card_table) copy_brick_card_table(); - - rearrange_uoh_segments(); -#ifdef BACKGROUND_GC - background_delay_delete_uoh_segments(); - if (!gc_heap::background_running_p()) - rearrange_small_heap_segments(); -#endif //BACKGROUND_GC + delay_free_segments(); #endif //MULTIPLE_HEAPS BOOL should_evaluate_elevation = TRUE; @@ -23882,10 +24110,8 @@ void gc_heap::garbage_collect (int n) do_pre_gc(); #ifdef MULTIPLE_HEAPS + dprintf (9999, ("in GC, resetting gc_start")); gc_start_event.Reset(); -#ifdef DYNAMIC_HEAP_COUNT - gc_idle_thread_event.Reset(); -#endif //DYNAMIC_HEAP_COUNT dprintf(3, ("Starting all gc threads for gc")); gc_t_join.restart(); #endif //MULTIPLE_HEAPS @@ -24341,7 +24567,7 @@ void gc_heap::equalize_promoted_bytes(int condemned_gen_number) // hope is to achieve better work balancing in relocate and compact phases // this is also used when the heap count changes to balance regions between heaps int highest_gen_number = ((condemned_gen_number == max_generation) ? - (total_generation_count - 1) : condemned_gen_number); + (total_generation_count - 1) : condemned_gen_number); int stop_gen_idx = get_stop_generation_index (condemned_gen_number); for (int gen_idx = highest_gen_number; gen_idx >= stop_gen_idx; gen_idx--) @@ -25050,285 +25276,332 @@ void gc_heap::recommission_heap() #endif //RECORD_LOH_STATE } -void gc_heap::check_heap_count () +float median_of_3 (float a, float b, float c) { - dynamic_heap_count_data.new_n_heaps = n_heaps; +#define compare_and_swap(i, j) \ + { \ + if (i < j) \ + { \ + float t = i; \ + i = j; \ + j = t; \ + } \ + } + compare_and_swap (b, a); + compare_and_swap (c, a); + compare_and_swap (c, b); +#undef compare_and_swap + return b; +} - if (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes) +size_t gc_heap::get_num_completed_gcs () +{ + size_t num_completed_gcs = settings.gc_index; +#ifdef BACKGROUND_GC + if (g_heaps[0]->is_bgc_in_progress ()) { - return; + num_completed_gcs--; + dprintf (6666, ("BGC in prog, completed GCs -> %Id", num_completed_gcs)); } +#endif //BACKGROUND_GC - // we should be calling this only on the main GC thread - assert (heap_number == 0); + return num_completed_gcs; +} - // acquire data for the current sample - uint64_t soh_msl_wait_time = 0; - uint64_t uoh_msl_wait_time = 0; - size_t allocating_thread_count = 0; - size_t heap_size = 0; - for (int i = 0; i < n_heaps; i++) +int gc_heap::calculate_new_heap_count () +{ + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); + + size_t num_completed_gcs = get_num_completed_gcs (); + + dprintf (6666, ("current GC %Id(completed: %Id), prev completed GCs %Id, last full GC happened at index %Id", + VolatileLoadWithoutBarrier (&settings.gc_index), num_completed_gcs, dynamic_heap_count_data.prev_num_completed_gcs, gc_index_full_gc_end)); + + if (num_completed_gcs < (dynamic_heap_count_data.prev_num_completed_gcs + dynamic_heap_count_data_t::sample_size)) { - gc_heap* hp = g_heaps[i]; + dprintf (6666, ("not enough GCs, skipping")); + return n_heaps; + } - allocating_thread_count += hp->alloc_contexts_used; + float median_gen2_tcp_percent = 0.0f; + if (gc_index_full_gc_end >= (settings.gc_index - dynamic_heap_count_data_t::sample_size)) + { + median_gen2_tcp_percent = dynamic_heap_count_data.get_median_gen2_gc_percent (); + } - soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time; - hp->more_space_lock_soh.msl_wait_time = 0; - hp->more_space_lock_soh.msl_wait_count = 0; + // If there was a blocking gen2 GC, the overhead would be very large and most likely we would not pick it. So we + // rely on the gen2 sample's overhead calculated above. + float throughput_cost_percents[dynamic_heap_count_data_t::sample_size]; + for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++) + { + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i]; + throughput_cost_percents[i] = (sample.elapsed_between_gcs ? (((float)sample.msl_wait_time / n_heaps + sample.gc_pause_time) * 100.0f / (float)sample.elapsed_between_gcs) : 0.0f); + assert (throughput_cost_percents[i] >= 0.0); + if (throughput_cost_percents[i] > 100.0) + throughput_cost_percents[i] = 100.0; + dprintf (6666, ("sample %d: msl %I64d / %d + pause %I64d / elapsed %I64d = throughput_cost_percent: %.3f", i, + sample.msl_wait_time, n_heaps, sample.gc_pause_time, sample.elapsed_between_gcs, throughput_cost_percents[i])); + } - uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time; - hp->more_space_lock_uoh.msl_wait_time = 0; - hp->more_space_lock_uoh.msl_wait_count = 0; + float median_throughput_cost_percent = median_of_3 (throughput_cost_percents[0], throughput_cost_percents[1], throughput_cost_percents[2]); + + // apply exponential smoothing and use 1/3 for the smoothing factor + const float smoothing = 3; + float smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent; + if (smoothed_median_throughput_cost_percent != 0.0f) + { + // average it with the previous value + smoothed_median_throughput_cost_percent = median_throughput_cost_percent / smoothing + (smoothed_median_throughput_cost_percent / smoothing) * (smoothing - 1); + } + else + { + smoothed_median_throughput_cost_percent = median_throughput_cost_percent; + } + + dprintf (6666, ("median tcp: %.3f, smoothed tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)", + median_throughput_cost_percent, smoothed_median_throughput_cost_percent, median_gen2_tcp_percent, + dynamic_heap_count_data.gen2_gc_percents[0], dynamic_heap_count_data.gen2_gc_percents[1], dynamic_heap_count_data.gen2_gc_percents[2])); + + size_t heap_size = 0; + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++) { dynamic_data* dd = hp->dynamic_data_of (gen_idx); // estimate the size of each generation as the live data size plus the budget - heap_size += dd_promoted_size (dd) + dd_desired_allocation (dd); - dprintf (6666, ("h%d g%d promoted: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd))); + heap_size += dd_current_size (dd) + dd_desired_allocation (dd); + dprintf (3, ("h%d g%d current: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd))); } } - dynamic_data* hp0_dd0 = g_heaps[0]->dynamic_data_of (0); + // estimate the space cost of adding a heap as the min gen0 budget + size_t heap_space_cost_per_heap = dd_min_size (g_heaps[0]->dynamic_data_of (0)); - // persist data for the current sample - dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + // compute the % space cost of adding a heap + float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size; - sample.soh_msl_wait_time = soh_msl_wait_time / n_heaps; - sample.uoh_msl_wait_time = uoh_msl_wait_time / n_heaps; - sample.elapsed_between_gcs = dd_time_clock (hp0_dd0) - dd_previous_time_clock (hp0_dd0); - sample.gc_elapsed_time = dd_gc_elapsed_time (hp0_dd0); - sample.allocating_thread_count = allocating_thread_count; - sample.heap_size = heap_size; + // compute reasonable step sizes for the heap count + // + // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ... + // we don't go all the way to the number of CPUs, but stay 1 or 2 short + int step_up = (n_heaps + 1) / 2; + int extra_heaps = 1 + (n_max_heaps >= 32); + step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps); - dprintf (6666, ("sample %d: soh_msl_wait_time: %zd, uoh_msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB", - dynamic_heap_count_data.sample_index, - sample.soh_msl_wait_time, - sample.uoh_msl_wait_time, - sample.elapsed_between_gcs, - sample.gc_elapsed_time, - sample.heap_size/(1024*1024))); + // on the way down, we essentially divide the heap count by 1.5 + int step_down = (n_heaps + 1) / 3; - dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size; + // estimate the potential time benefit of going up a step + float tcp_reduction_per_step_up = smoothed_median_throughput_cost_percent * step_up / (n_heaps + step_up); - GCEventFireHeapCountSample_V1( - sample.gc_elapsed_time, - sample.soh_msl_wait_time, - sample.uoh_msl_wait_time, - sample.elapsed_between_gcs - ); + // estimate the potential time cost of going down a step + float tcp_increase_per_step_down = smoothed_median_throughput_cost_percent * step_down / (n_heaps - step_down); + + // estimate the potential space cost of going up a step + float scp_increase_per_step_up = percent_heap_space_cost_per_heap * step_up; - if (settings.gc_index < prev_change_heap_count_gc_index + 3) + // estimate the potential space saving of going down a step + float scp_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down; + + dprintf (6666, ("[CHP] u %d, d %d | space cost %Id / heap %Id(%.2fmb) = scp %.3f (u: %.3f, d: %.3f) | stcp %.3f, u * %.1f = %.3f, d * %.1f = %.3f", + step_up, step_down, + heap_space_cost_per_heap, heap_size, ((float)heap_size / (float)1000 / (float)1000), percent_heap_space_cost_per_heap, + scp_increase_per_step_up, scp_decrease_per_step_down, + smoothed_median_throughput_cost_percent, + ((float)step_up / (float)(n_heaps + step_up)), tcp_reduction_per_step_up, + ((float)step_down / (float)(n_heaps - step_down)), tcp_increase_per_step_down)); + +#ifdef STRESS_DYNAMIC_HEAP_COUNT + // quick hack for initial testing + int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1; + + // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap + if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1)) { - // reconsider the decision every few gcs - return; + new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps); + new_n_heaps = max (new_n_heaps, 1); } - - if (gc_heap::background_running_p()) + dprintf (6666, ("stress %d -> %d", n_heaps, new_n_heaps)); +#else //STRESS_DYNAMIC_HEAP_COUNT + int new_n_heaps = n_heaps; + if (median_throughput_cost_percent > 10.0f) { - // can't have background gc running while we change the number of heaps - // so it's useless to compute a new number of heaps here + // ramp up more agressively - use as many heaps as it would take to bring + // the tcp down to 5% + new_n_heaps = (int)(n_heaps * (median_throughput_cost_percent / 5.0)); + dprintf (6666, ("[CHP0] tcp %.3f -> %d * %.3f = %d", median_throughput_cost_percent, n_heaps, (median_throughput_cost_percent / 5.0), new_n_heaps)); + new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps); } - else + // if the median tcp is 10% or less, react slower + else if ((smoothed_median_throughput_cost_percent > 5.0f) || (median_gen2_tcp_percent > 10.0f)) { - // compute the % overhead from msl waiting time and gc time for each of the samples - float percent_overhead[dynamic_heap_count_data_t::sample_size]; - for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++) - { - dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i]; - uint64_t overhead_time = sample.soh_msl_wait_time + sample.uoh_msl_wait_time + sample.gc_elapsed_time; - percent_overhead[i] = overhead_time * 100.0f / sample.elapsed_between_gcs; - if (percent_overhead[i] < 0) - percent_overhead[i] = 0; - else if (percent_overhead[i] > 100) - percent_overhead[i] = 100; - dprintf (6666, ("sample %d: percent_overhead: %d%%", i, (int)percent_overhead[i])); - } - // compute the median of the percent overhead samples - #define compare_and_swap(i, j) \ - { \ - if (percent_overhead[i] < percent_overhead[j]) \ - { \ - float t = percent_overhead[i]; \ - percent_overhead[i] = percent_overhead[j]; \ - percent_overhead[j] = t; \ - } \ - } - compare_and_swap (1, 0); - compare_and_swap (2, 0); - compare_and_swap (2, 1); - #undef compare_and_swap - - // the middle element is the median overhead percentage - float median_percent_overhead = percent_overhead[1]; - - // apply exponential smoothing and use 1/3 for the smoothing factor - const float smoothing = 3; - float smoothed_median_percent_overhead = dynamic_heap_count_data.smoothed_median_percent_overhead; - if (smoothed_median_percent_overhead != 0.0f) - { - // average it with the previous value - smoothed_median_percent_overhead = median_percent_overhead / smoothing + (smoothed_median_percent_overhead / smoothing) * (smoothing - 1); + if (smoothed_median_throughput_cost_percent > 5.0f) + { + dprintf (6666, ("[CHP1] stcp %.3f > 5, %d + %d = %d", smoothed_median_throughput_cost_percent, n_heaps, step_up, (n_heaps + step_up))); } else { - // first time? initialize to the median - smoothed_median_percent_overhead = median_percent_overhead; + dprintf (6666, ("[CHP2] tcp %.3f > 10, %d + %d = %d", median_gen2_tcp_percent, n_heaps, step_up, (n_heaps + step_up))); } + new_n_heaps += step_up; + } + // if we can save at least 1% more in time than we spend in space, increase number of heaps + else if ((tcp_reduction_per_step_up - scp_increase_per_step_up) >= 1.0f) + { + dprintf (6666, ("[CHP3] % .3f - % .3f = % .3f, % d + % d = % d", + tcp_reduction_per_step_up, scp_increase_per_step_up, (tcp_reduction_per_step_up - scp_increase_per_step_up), + n_heaps, step_up, (n_heaps + step_up))); + new_n_heaps += step_up; + } + // if we can save at least 1% more in space than we spend in time, decrease number of heaps + else if ((smoothed_median_throughput_cost_percent < 1.0f) && + (median_gen2_tcp_percent < 5.0f) && + ((scp_decrease_per_step_down - tcp_increase_per_step_down) >= 1.0f)) + { + dprintf (6666, ("[CHP4] stcp %.3f tcp %.3f, %.3f - %.3f = %.3f, %d + %d = %d", + smoothed_median_throughput_cost_percent, median_gen2_tcp_percent, + scp_decrease_per_step_down, tcp_increase_per_step_down, (scp_decrease_per_step_down - tcp_increase_per_step_down), + n_heaps, step_up, (n_heaps + step_up))); + new_n_heaps -= step_down; + } - dprintf (6666, ("median overhead: %d%% smoothed median overhead: %d%%", (int)(median_percent_overhead*1000), (int)(smoothed_median_percent_overhead*1000))); - - // estimate the space cost of adding a heap as the min gen0 size - size_t heap_space_cost_per_heap = dd_min_size (hp0_dd0); - - // compute the % space cost of adding a heap - float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size; - - // compute reasonable step sizes for the heap count + assert (new_n_heaps >= 1); + assert (new_n_heaps <= n_max_heaps); +#endif //STRESS_DYNAMIC_HEAP_COUNT - // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ... - // we don't go all the way to the number of CPUs, but stay 1 or 2 short - int step_up = (n_heaps + 1) / 2; - int extra_heaps = 1 + (n_max_heaps >= 32); - step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps); + // store data used for decision to emit in ETW event + dynamic_heap_count_data.median_throughput_cost_percent = median_throughput_cost_percent; + dynamic_heap_count_data.smoothed_median_throughput_cost_percent = smoothed_median_throughput_cost_percent; + dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap; + dynamic_heap_count_data.tcp_reduction_per_step_up = tcp_reduction_per_step_up; + dynamic_heap_count_data.tcp_increase_per_step_down = tcp_increase_per_step_down; + dynamic_heap_count_data.scp_increase_per_step_up = scp_increase_per_step_up; + dynamic_heap_count_data.scp_decrease_per_step_down = scp_decrease_per_step_down; + + GCEventFireHeapCountTuning_V1 ( + (uint16_t)dynamic_heap_count_data.new_n_heaps, + (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index), + dynamic_heap_count_data.median_throughput_cost_percent, + dynamic_heap_count_data.smoothed_median_throughput_cost_percent, + dynamic_heap_count_data.tcp_reduction_per_step_up, + dynamic_heap_count_data.tcp_increase_per_step_down, + dynamic_heap_count_data.scp_increase_per_step_up, + dynamic_heap_count_data.scp_decrease_per_step_down + ); - // on the way down, we essentially divide the heap count by 1.5 - int step_down = (n_heaps + 1) / 3; + dynamic_heap_count_data.prev_num_completed_gcs = num_completed_gcs; - // estimate the potential time benefit of going up a step - float overhead_reduction_per_step_up = smoothed_median_percent_overhead * step_up / (n_heaps + step_up); + if (new_n_heaps != n_heaps) + { + dprintf (6666, ("should change! %d->%d", n_heaps, new_n_heaps)); + dynamic_heap_count_data.heap_count_to_change_to = new_n_heaps; + dynamic_heap_count_data.should_change_heap_count = true; + } - // estimate the potential time cost of going down a step - float overhead_increase_per_step_down = smoothed_median_percent_overhead * step_down / (n_heaps - step_down); + return new_n_heaps; +} - // estimate the potential space cost of going up a step - float space_cost_increase_per_step_up = percent_heap_space_cost_per_heap * step_up; +void gc_heap::check_heap_count () +{ + dynamic_heap_count_data.new_n_heaps = dynamic_heap_count_data.heap_count_to_change_to; - // estimate the potential space saving of going down a step - float space_cost_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down; + assert (dynamic_heap_count_data.new_n_heaps != n_heaps); -#ifdef STRESS_DYNAMIC_HEAP_COUNT - // quick hack for initial testing - int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1; + if (dynamic_heap_count_data.new_n_heaps != n_heaps) + { + dprintf (9999, ("h0 suspending EE in check")); + // can't have threads allocating while we change the number of heaps + GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); + dprintf (9999, ("h0 suspended EE in check")); - // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap - if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1)) +#ifdef BACKGROUND_GC + if (gc_heap::background_running_p()) { - new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps); + // background GC is running - reset the new heap count + dynamic_heap_count_data.new_n_heaps = n_heaps; + dprintf (6666, ("can't change heap count! BGC in progress")); - // but not down to zero, obviously... - new_n_heaps = max (new_n_heaps, 1); - } -#else //STRESS_DYNAMIC_HEAP_COUNT - int new_n_heaps = n_heaps; - if (median_percent_overhead > 10.0f) - { - // ramp up more agressively - use as many heaps as it would take to bring - // the overhead down to 5% - new_n_heaps = (int)(n_heaps * (median_percent_overhead / 5.0)); - new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps); - } - // if the median overhead is 10% or less, react slower - else if (smoothed_median_percent_overhead > 5.0f) - { - new_n_heaps += step_up; - } - // if we can save at least 1% more in time than we spend in space, increase number of heaps - else if (overhead_reduction_per_step_up - space_cost_increase_per_step_up >= 1.0f) - { - new_n_heaps += step_up; - } - // if we can save at least 1% more in space than we spend in time, decrease number of heaps - else if (smoothed_median_percent_overhead < 1.0f && space_cost_decrease_per_step_down - overhead_increase_per_step_down >= 1.0f) - { - new_n_heaps -= step_down; + GCToEEInterface::RestartEE(TRUE); } +#endif //BACKGROUND_GC + } - dprintf (6666, ("or: %d, si: %d, sd: %d, oi: %d => %d -> %d", - (int)overhead_reduction_per_step_up, - (int)space_cost_increase_per_step_up, - (int)space_cost_decrease_per_step_down, - (int)overhead_increase_per_step_down, - n_heaps, - new_n_heaps)); - - assert (1 <= new_n_heaps); - assert (new_n_heaps <= n_max_heaps); -#endif //STRESS_DYNAMIC_HEAP_COUNT - - dynamic_heap_count_data.new_n_heaps = new_n_heaps; - - // store data used for decision to emit in ETW event - dynamic_heap_count_data.median_percent_overhead = median_percent_overhead; - dynamic_heap_count_data.smoothed_median_percent_overhead = smoothed_median_percent_overhead; - dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap; - dynamic_heap_count_data.overhead_reduction_per_step_up = overhead_reduction_per_step_up; - dynamic_heap_count_data.overhead_increase_per_step_down = overhead_increase_per_step_down; - dynamic_heap_count_data.space_cost_increase_per_step_up = space_cost_increase_per_step_up; - dynamic_heap_count_data.space_cost_decrease_per_step_down = space_cost_decrease_per_step_down; - - GCEventFireHeapCountTuning_V1( - (uint16_t)dynamic_heap_count_data.new_n_heaps, - (uint64_t)VolatileLoad(&settings.gc_index), - dynamic_heap_count_data.median_percent_overhead, - dynamic_heap_count_data.smoothed_median_percent_overhead, - dynamic_heap_count_data.overhead_reduction_per_step_up, - dynamic_heap_count_data.overhead_increase_per_step_down, - dynamic_heap_count_data.space_cost_increase_per_step_up, - dynamic_heap_count_data.space_cost_decrease_per_step_down - ); - - if (new_n_heaps != n_heaps) + if (dynamic_heap_count_data.new_n_heaps != n_heaps) + { + dprintf (6666, ("prep to change from %d to %d", n_heaps, dynamic_heap_count_data.new_n_heaps)); + if (!prepare_to_change_heap_count (dynamic_heap_count_data.new_n_heaps)) { - // can't have threads allocating while we change the number of heaps - GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); - - if (gc_heap::background_running_p()) - { - // background GC is running - reset the new heap count - dynamic_heap_count_data.new_n_heaps = n_heaps; - - GCToEEInterface::RestartEE(TRUE); - } + // we don't have sufficient resources - reset the new heap count + dynamic_heap_count_data.new_n_heaps = n_heaps; } } if (dynamic_heap_count_data.new_n_heaps == n_heaps) { // heap count stays the same, no work to do - dprintf (6666, ("heap count stays the same, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, n_heaps)); + dynamic_heap_count_data.prev_num_completed_gcs = get_num_completed_gcs (); + dynamic_heap_count_data.should_change_heap_count = false; - // come back after 3 GCs to reconsider - prev_change_heap_count_gc_index = settings.gc_index; + dprintf (6666, ("heap count stays the same %d, no work to do, set prev completed to %Id", dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.prev_num_completed_gcs)); return; } - if (GCScan::GetGcRuntimeStructuresValid()) + int new_n_heaps = dynamic_heap_count_data.new_n_heaps; + + assert (!(dynamic_heap_count_data.init_only_p)); + { + // At this point we are guaranteed to be able to change the heap count to the new one. + // Change the heap count for joins here because we will need to join new_n_heaps threads together. + dprintf (9999, ("changing join hp %d->%d", n_heaps, new_n_heaps)); + int max_threads_to_wake = max (n_heaps, new_n_heaps); + gc_t_join.update_n_threads (max_threads_to_wake); + // make sure the other gc threads cannot see this as a request to GC assert (dynamic_heap_count_data.new_n_heaps != n_heaps); + + if (n_heaps < new_n_heaps) + { + int saved_idle_thread_count = dynamic_heap_count_data.idle_thread_count; + Interlocked::ExchangeAdd (&dynamic_heap_count_data.idle_thread_count, (n_heaps - new_n_heaps)); + dprintf (9999, ("GC thread %d setting idle events for h%d-h%d, total idle %d -> %d", heap_number, n_heaps, (new_n_heaps - 1), + saved_idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count))); + + for (int heap_idx = n_heaps; heap_idx < new_n_heaps; heap_idx++) + { + g_heaps[heap_idx]->gc_idle_thread_event.Set(); +#ifdef BACKGROUND_GC + g_heaps[heap_idx]->bgc_idle_thread_event.Set(); +#endif //BACKGROUND_GC + } + } + gc_start_event.Set(); } int old_n_heaps = n_heaps; + (dynamic_heap_count_data.heap_count_change_count)++; change_heap_count (dynamic_heap_count_data.new_n_heaps); GCToEEInterface::RestartEE(TRUE); - prev_change_heap_count_gc_index = settings.gc_index; + dprintf (9999, ("h0 restarted EE")); // we made changes to the heap count that will change the overhead, // so change the smoothed overhead to reflect that - int new_n_heaps = n_heaps; - dynamic_heap_count_data.smoothed_median_percent_overhead = dynamic_heap_count_data.smoothed_median_percent_overhead/new_n_heaps*old_n_heaps; + dynamic_heap_count_data.smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent / n_heaps * old_n_heaps; + + dprintf (6666, ("h0 finished changing, set should change to false!")); + dynamic_heap_count_data.should_change_heap_count = false; } bool gc_heap::prepare_to_change_heap_count (int new_n_heaps) { - dprintf (6666, ("trying to change heap count %d -> %d", n_heaps, new_n_heaps)); + dprintf (9999, ("trying to change heap count %d -> %d", n_heaps, new_n_heaps)); // use this variable for clarity - n_heaps will change during the transition int old_n_heaps = n_heaps; @@ -25371,6 +25644,17 @@ bool gc_heap::prepare_to_change_heap_count (int new_n_heaps) } } + // Before we look at whether we have sufficient regions we should return regions that should be deleted to free + // so we don't lose them when we decommission heaps. We could do this for only heaps that we are about + // to decomission. But it's better to do this for all heaps because we don't need to worry about adding them to the + // heaps remain (freeable uoh/soh regions) and we get rid of regions with the heap_segment_flags_uoh_delete flag + // because background_delay_delete_uoh_segments makes the assumption it can't be the start region. + for (int i = 0; i < old_n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; + hp->delay_free_segments (); + } + // if we want to increase the number of heaps, we have to make sure we can give // each heap a region for each generation. If we cannot do that, we have to give up ptrdiff_t region_count_in_gen[total_generation_count]; @@ -25451,39 +25735,34 @@ bool gc_heap::prepare_to_change_heap_count (int new_n_heaps) bool gc_heap::change_heap_count (int new_n_heaps) { + dprintf (9999, ("BEG heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps)); + // use this variable for clarity - n_heaps will change during the transition int old_n_heaps = n_heaps; + bool init_only_p = dynamic_heap_count_data.init_only_p; - if (heap_number == 0) { - if (!prepare_to_change_heap_count (new_n_heaps)) - { - // we don't have sufficient resources - reset the new heap count - dynamic_heap_count_data.new_n_heaps = n_heaps; - } - } - - if (GCScan::GetGcRuntimeStructuresValid()) - { - // join for sufficient resources decision gc_t_join.join (this, gc_join_merge_temp_fl); if (gc_t_join.joined ()) { + // BGC is not running, we can safely change its join's heap count. +#ifdef BACKGROUND_GC + bgc_t_join.update_n_threads (new_n_heaps); +#endif //BACKGROUND_GC + + dynamic_heap_count_data.init_only_p = false; + dprintf (9999, ("in change h%d resetting gc_start, update bgc join to %d heaps", heap_number, new_n_heaps)); gc_start_event.Reset(); gc_t_join.restart (); } } - // gc_heap::n_heaps may have changed by now, compare to the snapshot *before* the join - if (dynamic_heap_count_data.new_n_heaps == old_n_heaps) - { - dprintf (6666, ("failed to change heap count, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, old_n_heaps)); - return false; - } + assert (dynamic_heap_count_data.new_n_heaps != old_n_heaps); + + dprintf (9999, ("Waiting h0 heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps)); if (heap_number == 0) { - // after having checked for sufficient resources, we are now committed to actually change the heap count dprintf (3, ("switching heap count from %d to %d heaps", old_n_heaps, new_n_heaps)); // spread finalization data out to heaps coming into service @@ -25504,17 +25783,23 @@ bool gc_heap::change_heap_count (int new_n_heaps) from_heap_number = (from_heap_number + 1) % old_n_heaps; } - // prepare for the switch by fixing the allocation contexts on the old heaps, + // prepare for the switch by fixing the allocation contexts on the old heaps, unify the gen0_bricks_cleared flag, // and setting the survived size for the existing regions to their allocated size + BOOL unified_gen0_bricks_cleared = TRUE; for (int i = 0; i < old_n_heaps; i++) { gc_heap* hp = g_heaps[i]; - if (GCScan::GetGcRuntimeStructuresValid()) + if (!init_only_p) { hp->fix_allocation_contexts (TRUE); } + if (unified_gen0_bricks_cleared && (hp->gen0_bricks_cleared == FALSE)) + { + unified_gen0_bricks_cleared = FALSE; + } + for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++) { generation* gen = hp->generation_of (gen_idx); @@ -25614,7 +25899,7 @@ bool gc_heap::change_heap_count (int new_n_heaps) hpd->free_regions[kind].transfer_regions(&hp->free_regions[kind]); } } - // update number of heaps + dprintf (9999, ("h%d changing %d->%d", heap_number, n_heaps, new_n_heaps)); n_heaps = new_n_heaps; // even out the regions over the current number of heaps @@ -25625,6 +25910,8 @@ bool gc_heap::change_heap_count (int new_n_heaps) { gc_heap* hp = g_heaps[i]; + hp->gen0_bricks_cleared = unified_gen0_bricks_cleared; + // establish invariants regarding the ephemeral segment generation* gen0 = hp->generation_of (0); if ((hp->ephemeral_heap_segment == nullptr) || @@ -25653,7 +25940,9 @@ bool gc_heap::change_heap_count (int new_n_heaps) } } - if (GCScan::GetGcRuntimeStructuresValid()) + dprintf (3, ("individual heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps)); + + if (!init_only_p) { // join for rethreading the free lists gc_t_join.join (this, gc_join_merge_temp_fl); @@ -25665,7 +25954,11 @@ bool gc_heap::change_heap_count (int new_n_heaps) // rethread the free lists for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++) { - rethread_fl_items (gen_idx); + if (heap_number < old_n_heaps) + { + dprintf (3, ("h%d calling per heap work!", heap_number)); + rethread_fl_items (gen_idx); + } // join for merging the free lists gc_t_join.join (this, gc_join_merge_temp_fl); @@ -25676,18 +25969,14 @@ bool gc_heap::change_heap_count (int new_n_heaps) gc_t_join.restart (); } } +#ifdef BACKGROUND_GC // there should be no items in the bgc_alloc_lock bgc_alloc_lock->check(); +#endif //BACKGROUND_GC } if (heap_number == 0) { - // udate the number of heaps in the joins - gc_t_join.update_n_threads(new_n_heaps); - #ifdef BACKGROUND_GC - bgc_t_join.update_n_threads(new_n_heaps); - #endif //BACKGROUND_GC - // compute the total budget per generation over the old heaps // and figure out what the new budget per heap is ptrdiff_t budget_per_heap[total_generation_count]; @@ -25747,21 +26036,50 @@ bool gc_heap::change_heap_count (int new_n_heaps) hp->decommission_heap(); } - if (GCScan::GetGcRuntimeStructuresValid()) + if (!init_only_p) { // make sure no allocation contexts point to idle heaps fix_allocation_contexts_heaps(); } - if (old_n_heaps < new_n_heaps) + dynamic_heap_count_data.last_n_heaps = old_n_heaps; + } + + // join the last time to change the heap count again if needed. + if (new_n_heaps < old_n_heaps) + { + gc_t_join.join (this, gc_join_merge_temp_fl); + if (gc_t_join.joined ()) { - // wake up threads for the new heaps - gc_idle_thread_event.Set(); + dprintf (9999, ("now changing the join heap count to the smaller one %d", new_n_heaps)); + gc_t_join.update_n_threads (new_n_heaps); + + gc_t_join.restart (); } } return true; } + +size_t gc_heap::get_msl_wait_time() +{ + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); + + size_t msl_wait_since_pause = 0; + + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; + + msl_wait_since_pause += hp->more_space_lock_soh.msl_wait_time; + hp->more_space_lock_soh.msl_wait_time = 0; + + msl_wait_since_pause += hp->more_space_lock_uoh.msl_wait_time; + hp->more_space_lock_uoh.msl_wait_time = 0; + } + + return msl_wait_since_pause; +} #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS @@ -32805,17 +33123,17 @@ void gc_heap::plan_phase (int condemned_gen_number) } else { - dprintf (2, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd", + dprintf (1, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd", end_seg_allocated, condemned_allocated, generation_condemned_allocated (generation_of (max_generation - 1)))); } - dprintf (1, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd", + dprintf (2, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd", r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen), r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen), r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen))); - dprintf (1, ("this GC did %zd free list alloc(%zd bytes free space rejected)", + dprintf (2, ("this GC did %zd free list alloc(%zd bytes free space rejected)", free_list_allocated, rejected_free_space)); maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info); @@ -38908,9 +39226,9 @@ void gc_heap::bgc_thread_function() { // this is the case where we have more background GC threads than heaps // - wait until we're told to continue... - dprintf (3, ("BGC thread %d idle", heap_number)); - gc_idle_thread_event.Wait(INFINITE, FALSE); - dprintf (3, ("BGC thread %d waking from idle", heap_number)); + dprintf (9999, ("BGC thread %d idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index))); + bgc_idle_thread_event.Wait(INFINITE, FALSE); + dprintf (9999, ("BGC thread %d waking from idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index))); continue; } #endif //DYNAMIC_HEAP_COUNT @@ -38982,7 +39300,7 @@ void gc_heap::bgc_thread_function() dprintf (SPINLOCK_LOG, ("bgc Lgc")); leave_spin_lock (&gc_lock); #ifdef MULTIPLE_HEAPS - dprintf(1, ("End of BGC - starting all BGC threads")); + dprintf(1, ("End of BGC")); bgc_t_join.restart(); #endif //MULTIPLE_HEAPS } @@ -42859,6 +43177,9 @@ bool gc_heap::init_dynamic_data() { process_start_time = now; smoothed_desired_total[0] = dynamic_data_of (0)->min_size * n_heaps; +#ifdef DYNAMIC_HEAP_COUNT + last_suspended_end_time = now; +#endif //DYNAMIC_HEAP_COUNT #ifdef HEAP_BALANCE_INSTRUMENTATION last_gc_end_time_us = now; dprintf (HEAP_BALANCE_LOG, ("qpf=%zd, start: %zd(%d)", qpf, start_raw_ts, now)); @@ -47957,6 +48278,7 @@ HRESULT GCHeap::Initialize() uint32_t nhp = 1; uint32_t nhp_from_config = 0; + uint32_t max_nhp_from_config = (uint32_t)GCConfig::GetMaxHeapCount(); #ifndef MULTIPLE_HEAPS GCConfig::SetServerGC(false); @@ -48151,6 +48473,10 @@ HRESULT GCHeap::Initialize() #ifdef MULTIPLE_HEAPS assert (nhp <= g_num_processors); + if (max_nhp_from_config) + { + nhp = min (nhp, max_nhp_from_config); + } gc_heap::n_max_heaps = nhp; gc_heap::n_heaps = nhp; hr = gc_heap::initialize_gc (seg_size, large_seg_size, pin_seg_size, nhp); @@ -48301,9 +48627,32 @@ HRESULT GCHeap::Initialize() { // start with only 1 heap gc_heap::smoothed_desired_total[0] /= gc_heap::n_heaps; - gc_heap::g_heaps[0]->change_heap_count (1); + int initial_n_heaps = 1; + dprintf (9999, ("gc_heap::n_heaps is %d, initial %d", gc_heap::n_heaps, initial_n_heaps)); + + { + if (!gc_heap::prepare_to_change_heap_count (initial_n_heaps)) + { + // we don't have sufficient resources. + return E_FAIL; + } + + gc_heap::dynamic_heap_count_data.new_n_heaps = initial_n_heaps; + gc_heap::dynamic_heap_count_data.idle_thread_count = 0; + gc_heap::dynamic_heap_count_data.init_only_p = true; + + int max_threads_to_wake = max (gc_heap::n_heaps, initial_n_heaps); + gc_t_join.update_n_threads (max_threads_to_wake); + gc_heap::gc_start_event.Set (); + } + + gc_heap::g_heaps[0]->change_heap_count (initial_n_heaps); + gc_heap::gc_start_event.Reset (); + + // This needs to be different from our initial heap count so we can make sure we wait for + // the idle threads correctly in gc_thread_function. + gc_heap::dynamic_heap_count_data.last_n_heaps = 0; } - gc_heap::dynamic_heap_count_data.new_n_heaps = gc_heap::n_heaps; #endif //DYNAMIC_HEAP_COUNT GCScan::GcRuntimeStructuresValid (TRUE); @@ -49875,10 +50224,16 @@ void gc_heap::do_post_gc() } #endif //BGC_SERVO_TUNING +#ifdef BACKGROUND_GC + const char* str_gc_type = (settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC")); +#else + const char* str_gc_type = "NGC"; +#endif //BACKGROUND_GC + dprintf (1, (ThreadStressLog::gcDetailedEndMsg(), - VolatileLoad(&settings.gc_index), - dd_collection_count(hp->dynamic_data_of(0)), - (size_t)(GetHighPrecisionTimeStamp() / 1000), + VolatileLoad (&settings.gc_index), + dd_collection_count (hp->dynamic_data_of (0)), + (size_t)(GetHighPrecisionTimeStamp () / 1000), settings.condemned_generation, (settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC")), (settings.compaction ? "C" : "S"), diff --git a/src/coreclr/gc/gcconfig.h b/src/coreclr/gc/gcconfig.h index 72786778d5a978..aeded6bc97f17f 100644 --- a/src/coreclr/gc/gcconfig.h +++ b/src/coreclr/gc/gcconfig.h @@ -83,6 +83,7 @@ class GCConfigStringHolder INT_CONFIG (BGCSpinCount, "BGCSpinCount", NULL, 140, "Specifies the bgc spin count") \ INT_CONFIG (BGCSpin, "BGCSpin", NULL, 2, "Specifies the bgc spin time") \ INT_CONFIG (HeapCount, "GCHeapCount", "System.GC.HeapCount", 0, "Specifies the number of server GC heaps") \ + INT_CONFIG (MaxHeapCount, "GCMaxHeapCount", "System.GC.MaxHeapCount", 0, "Specifies the max number of server GC heaps to adjust to") \ INT_CONFIG (Gen0Size, "GCgen0size", NULL, 0, "Specifies the smallest gen0 budget") \ INT_CONFIG (SegmentSize, "GCSegmentSize", NULL, 0, "Specifies the managed heap segment size") \ INT_CONFIG (LatencyMode, "GCLatencyMode", NULL, -1, "Specifies the GC latency mode - batch, interactive or low latency (note that the same " \ diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 1a73add83b429f..cce6c5ee28adf0 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -402,8 +402,6 @@ struct GCDebugSpinLock { #if defined(DYNAMIC_HEAP_COUNT) // time in microseconds we wait for the more space lock uint64_t msl_wait_time; - // number of times we wait for the more space lock - uint64_t msl_wait_count; #endif //DYNAMIC_HEAP_COUNT GCDebugSpinLock() @@ -415,7 +413,7 @@ struct GCDebugSpinLock { , num_switch_thread(0), num_wait_longer(0), num_switch_thread_w(0), num_disable_preemptive_w(0) #endif #if defined(DYNAMIC_HEAP_COUNT) - , msl_wait_time(0), msl_wait_count(0) + , msl_wait_time(0) #endif //DYNAMIC_HEAP_COUNT { } @@ -1148,15 +1146,12 @@ class dynamic_data // // The following 3 fields are updated at the beginning of each GC, if that GC condemns this generation. // - // The number of GC that condemned this generation. The only difference between this - // and collection_count is just that collection_count is maintained for all physical generations - // (currently there are 5) whereas this is only updated for logical generations (there are 3). - size_t gc_clock; - uint64_t time_clock; //time when this gc started + size_t gc_clock; // the gc index + uint64_t time_clock; // time when this gc started uint64_t previous_time_clock; // time when previous gc started // Updated at the end of a GC, if that GC condemns this generation. - size_t gc_elapsed_time; // Time it took for the gc to complete + size_t gc_elapsed_time; // time it took for the gc to complete // // The following fields (and fields in sdata) are initialized during GC init time and do not change. @@ -1495,6 +1490,8 @@ class mark_queue_t void verify_empty(); }; +float median_of_3 (float a, float b, float c); + //class definition of the internal class class gc_heap { @@ -2422,6 +2419,7 @@ class gc_heap #ifndef USE_REGIONS PER_HEAP_METHOD void rearrange_heap_segments(BOOL compacting); #endif //!USE_REGIONS + PER_HEAP_METHOD void delay_free_segments(); PER_HEAP_ISOLATED_METHOD void distribute_free_regions(); #ifdef BACKGROUND_GC PER_HEAP_ISOLATED_METHOD void reset_write_watch_for_gc_heap(void* base_address, size_t region_size); @@ -2597,11 +2595,17 @@ class gc_heap // re-initialize a heap in preparation to putting it back into service PER_HEAP_METHOD void recommission_heap(); + PER_HEAP_ISOLATED_METHOD size_t get_num_completed_gcs(); + + PER_HEAP_ISOLATED_METHOD int calculate_new_heap_count(); + // check if we should change the heap count PER_HEAP_METHOD void check_heap_count(); - PER_HEAP_METHOD bool prepare_to_change_heap_count (int new_n_heaps); + PER_HEAP_ISOLATED_METHOD bool prepare_to_change_heap_count (int new_n_heaps); PER_HEAP_METHOD bool change_heap_count (int new_n_heaps); + + PER_HEAP_ISOLATED_METHOD size_t get_msl_wait_time(); #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS @@ -3778,6 +3782,13 @@ class gc_heap PER_HEAP_FIELD_MAINTAINED mark* loh_pinned_queue; #endif //FEATURE_LOH_COMPACTION +#ifdef DYNAMIC_HEAP_COUNT + PER_HEAP_FIELD_MAINTAINED GCEvent gc_idle_thread_event; +#ifdef BACKGROUND_GC + PER_HEAP_FIELD_MAINTAINED GCEvent bgc_idle_thread_event; +#endif //BACKGROUND_GC +#endif //DYNAMIC_HEAP_COUNT + /******************************************/ // PER_HEAP_FIELD_MAINTAINED_ALLOC fields // /******************************************/ @@ -4084,7 +4095,6 @@ class gc_heap // These 2 fields' values do not change but are set/unset per GC PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent gc_start_event; PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent ee_suspend_event; - PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent gc_idle_thread_event; // Also updated on the heap#0 GC thread because that's where we are actually doing the decommit. PER_HEAP_ISOLATED_FIELD_SINGLE_GC BOOL gradual_decommit_in_progress_p; @@ -4163,6 +4173,10 @@ class gc_heap PER_HEAP_ISOLATED_FIELD_SINGLE_GC uint8_t* gc_high; // high end of the highest region being condemned #endif //USE_REGIONS +#ifdef STRESS_DYNAMIC_HEAP_COUNT + PER_HEAP_ISOLATED_FIELD_SINGLE_GC int heaps_in_this_gc; +#endif //STRESS_DYNAMIC_HEAP_COUNT + /**************************************************/ // PER_HEAP_ISOLATED_FIELD_SINGLE_GC_ALLOC fields // /**************************************************/ @@ -4261,37 +4275,65 @@ class gc_heap #endif //USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT + // Sample collection - + // + // For every GC, we collect the msl wait time + GC pause duration info and use both to calculate the + // throughput cost percentage. We will also be using the wait time and the GC pause duration separately + // for other purposes in the future. + // + // For all gen2 GCs we also keep a separate array currently just for the GC cost. This serves as a backstop + // to smooth out the situation when we rarely pick the gen2 GCs in the first array. struct dynamic_heap_count_data_t { static const int sample_size = 3; struct sample { - uint64_t elapsed_between_gcs; // time between gcs in microseconds - uint64_t gc_elapsed_time; // time the gc took - uint64_t soh_msl_wait_time; // time the allocator spent waiting for the soh msl lock - uint64_t uoh_msl_wait_time; // time the allocator spent waiting for the uoh msl lock - size_t allocating_thread_count;// number of allocating threads - size_t heap_size; + uint64_t elapsed_between_gcs; // time between gcs in microseconds (this should really be between_pauses) + uint64_t gc_pause_time; // pause time for this GC + uint64_t msl_wait_time; }; - unsigned sample_index; + uint32_t sample_index; sample samples[sample_size]; + size_t prev_num_completed_gcs; + + uint32_t gen2_sample_index; + // This is (gc_elapsed_time / time inbetween this and the last gen2 GC) + float gen2_gc_percents[sample_size]; - float median_percent_overhead; // estimated overhead of allocator + gc - float smoothed_median_percent_overhead; // exponentially smoothed version - float percent_heap_space_cost_per_heap; // percent space cost of adding a heap - float overhead_reduction_per_step_up; // percentage effect on overhead of increasing heap count - float overhead_increase_per_step_down; // percentage effect on overhead of decreasing heap count - float space_cost_increase_per_step_up; // percentage effect on space of increasing heap count - float space_cost_decrease_per_step_down;// percentage effect on space of decreasing heap count + float median_throughput_cost_percent; // estimated overhead of allocator + gc + float smoothed_median_throughput_cost_percent; // exponentially smoothed version + float percent_heap_space_cost_per_heap; // percent space cost of adding a heap + float tcp_reduction_per_step_up; // throughput cost percent effect of increasing heap count + float tcp_increase_per_step_down; // throughput cost percent effect of decreasing heap count + float scp_increase_per_step_up; // space cost percent effect of increasing heap count + float scp_decrease_per_step_down; // space cost percent effect of decreasing heap count int new_n_heaps; + // the heap count we changed from + int last_n_heaps; + // don't start a GC till we see (n_max_heaps - new_n_heaps) number of threads idling + VOLATILE(int32_t) idle_thread_count; + bool init_only_p; + + bool should_change_heap_count; + int heap_count_to_change_to; + int heap_count_change_count; #ifdef STRESS_DYNAMIC_HEAP_COUNT int lowest_heap_with_msl_uoh; #endif //STRESS_DYNAMIC_HEAP_COUNT + + float get_median_gen2_gc_percent() + { + return median_of_3 (gen2_gc_percents[0], gen2_gc_percents[1], gen2_gc_percents[2]); + } }; PER_HEAP_ISOLATED_FIELD_MAINTAINED dynamic_heap_count_data_t dynamic_heap_count_data; + PER_HEAP_ISOLATED_FIELD_MAINTAINED uint64_t last_suspended_end_time; + // If the last full GC is blocking, this is that GC's index; for BGC, this is the settings.gc_index + // when the BGC ended. + PER_HEAP_ISOLATED_FIELD_MAINTAINED size_t gc_index_full_gc_end; #endif //DYNAMIC_HEAP_COUNT /****************************************************/ @@ -4867,7 +4909,6 @@ uint64_t& dd_previous_time_clock (dynamic_data* inst) return inst->previous_time_clock; } - inline size_t& dd_gc_clock_interval (dynamic_data* inst) { diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 980d40a47ac318..1c48d1c52f0bb2 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5485,6 +5485,13 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeI { assert(!src->isContained()); // there must be one non-contained src + if (addr->isContained() && addr->OperIs(GT_LCL_ADDR)) + { + GenTreeLclVarCommon* lclVar = addr->AsLclVarCommon(); + emitIns_S_R(ins, attr, src->GetRegNum(), lclVar->GetLclNum(), lclVar->GetLclOffs()); + return; + } + // ind, reg id = emitNewInstrAmd(attr, offset); emitHandleMemOp(storeInd, id, emitInsModeFormat(ins, IF_ARD_RRD), ins); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 153f9b8bba8a82..3deada8eec085b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10807,8 +10807,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } #if defined(TARGET_XARCH) - case NI_AVX512F_Add: - case NI_AVX512BW_Add: case NI_AVX512F_And: case NI_AVX512DQ_And: case NI_AVX512F_AndNot: @@ -10850,13 +10848,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_AVX512F_Add: - case NI_AVX512BW_Add: - { - maskIntrinsicId = NI_AVX512F_AddMask; - break; - } - case NI_AVX512F_And: case NI_AVX512DQ_And: { diff --git a/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs b/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs index e8f4b606882cf9..5b734edf32738c 100644 --- a/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs +++ b/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs @@ -95,7 +95,8 @@ private static bool TryValidateOptions(object options, string qualifiedName, Lis foreach (PropertyInfo propertyInfo in options.GetType().GetProperties(BindingFlags.Instance | BindingFlags.Public)) { - if (propertyInfo.GetMethod is null) + // Indexers are properties which take parameters. Ignore them. + if (propertyInfo.GetMethod is null || propertyInfo.GetMethod.GetParameters().Length > 0) { continue; } diff --git a/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs b/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs index b644eea74120f7..6109bccd296463 100644 --- a/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs +++ b/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs @@ -177,6 +177,30 @@ public void TestValidationWithEnumeration() result2.Failures); } + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))] + public void TestObjectsWithIndexerProperties() + { + DataAnnotationValidateOptions dataAnnotationValidateOptions1 = new("MyDictionaryOptions"); + MyDictionaryOptionsOptionsValidator sourceGenOptionsValidator1 = new(); + + var options1 = new MyDictionaryOptions(); + ValidateOptionsResult result1 = sourceGenOptionsValidator1.Validate("MyDictionaryOptions", options1); + ValidateOptionsResult result2 = dataAnnotationValidateOptions1.Validate("MyDictionaryOptions", options1); + + Assert.True(result1.Succeeded); + Assert.True(result2.Succeeded); + + DataAnnotationValidateOptions> dataAnnotationValidateOptions2 = new("MyListOptions"); + MyListOptionsOptionsValidator sourceGenOptionsValidator2 = new(); + + var options2 = new MyListOptions() { Prop = "test" }; + result1 = sourceGenOptionsValidator2.Validate("MyListOptions", options2); + result2 = dataAnnotationValidateOptions2.Validate("MyListOptions", options2); + + Assert.True(result1.Succeeded); + Assert.True(result2.Succeeded); + } + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))] public void TestValidationWithCyclicReferences() { @@ -302,6 +326,12 @@ public partial class MySourceGenOptionsValidator : IValidateOptions { } + public class MyDictionaryOptions : Dictionary { [Required] public string Prop { get; set; } = "test"; } + [OptionsValidator] public partial class MyDictionaryOptionsOptionsValidator : IValidateOptions { } + + public class MyListOptions : List { [Required] public T Prop { get; set; } = default; } + [OptionsValidator] public partial class MyListOptionsOptionsValidator : IValidateOptions> { } + #if NET8_0_OR_GREATER public class OptionsUsingNewAttributes { diff --git a/src/mono/wasm/README.md b/src/mono/wasm/README.md index 34c18bc8711468..cdace086f5603e 100644 --- a/src/mono/wasm/README.md +++ b/src/mono/wasm/README.md @@ -350,3 +350,12 @@ npm update --lockfile-version=1 | Multi-thread | linux: build only | none | * `high resource aot` runs a few specific library tests with AOT, that require more memory to AOT. + + +# Perf pipeline + +TBD + +## Updates needed + +- when the base OS is upgraded, check if the version of node installed in the `eng/pipelines/coreclr/templates/run-performance-job.yml` needs an upgrade too. diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs index 10717b334174b9..b006bf7a93e5c3 100644 --- a/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs +++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs @@ -63,7 +63,7 @@ public static class MyDllImports public static extern int cpp_add(int a, int b); }}"; - File.WriteAllText(Path.Combine(_projectDir!, "Pages", "MyDllImport.cs"), myDllImportCs); + File.WriteAllText(Path.Combine(_projectDir!, "Components", "Pages", "MyDllImport.cs"), myDllImportCs); AddItemsPropertiesToProject(projectFile, extraItems: @""); BlazorAddRazorButton("cpp_add", """ @@ -144,7 +144,7 @@ public void BugRegression_60479_WithRazorClassLib() Assert.Contains(razorClassLibraryFileName, lazyVal.EnumerateObject().Select(jp => jp.Name)); } - private void BlazorAddRazorButton(string buttonText, string customCode, string methodName = "test", string razorPage = "Pages/Counter.razor") + private void BlazorAddRazorButton(string buttonText, string customCode, string methodName = "test", string razorPage = "Components/Pages/Counter.razor") { string additionalCode = $$"""

Output: @outputText

diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs index cbe3e461ec9cde..7ca663dce5372f 100644 --- a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs +++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs @@ -84,7 +84,7 @@ public async Task WorkloadNotRequiredForInvariantGlobalization(string config, bo if (invariant) AddItemsPropertiesToProject(projectFile, extraProperties: "true"); - string counterPath = Path.Combine(Path.GetDirectoryName(projectFile)!, "Pages", "Counter.razor"); + string counterPath = Path.Combine(Path.GetDirectoryName(projectFile)!, "Components", "Pages", "Counter.razor"); string allText = File.ReadAllText(counterPath); string ccText = "currentCount++;"; if (allText.IndexOf(ccText) < 0) diff --git a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs index e520057d5b3bdf..62817719af0a14 100644 --- a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs +++ b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs @@ -49,7 +49,7 @@ public partial class InstallWorkloadFromArtifacts : Task private string _tempDir = string.Empty; private string _nugetCachePath = string.Empty; - [GeneratedRegex(@"^\d+\.\d+\.\d+(-[A-z]*\.*\d*)?")] + [GeneratedRegex(@"^\d+\.\d+\.\d+(-rtm|-[A-z]*\.*\d*)?")] private static partial Regex bandVersionRegex(); public override bool Execute() @@ -215,7 +215,7 @@ private bool InstallPacks(InstallWorkloadRequest req, string nugetConfigContents (int exitCode, string output) = Utils.TryRunProcess( Log, Path.Combine(req.TargetPath, "dotnet"), - $"workload install --skip-manifest-update --configfile \"{nugetConfigPath}\" --temp-dir \"{_tempDir}/workload-install-temp\" {req.WorkloadId}", + $"workload install --skip-manifest-update --skip-sign-check --configfile \"{nugetConfigPath}\" --temp-dir \"{_tempDir}/workload-install-temp\" {req.WorkloadId}", workingDir: _tempDir, envVars: new Dictionary () { ["NUGET_PACKAGES"] = _nugetCachePath diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs new file mode 100644 index 00000000000000..9b4696e31fc16c --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Threading; +using Xunit; + +public struct MutableStruct +{ + private long _internalValue; + + public long InternalValue + { + get => Volatile.Read(ref _internalValue); + private set => Volatile.Write(ref _internalValue, value); + } + + public void Add(long value) => AddInternal(value); + private void AddInternal(long value) => InternalValue += value; + public MutableStruct(long value) => InternalValue = value; +} + +public static class Runtime_92218 +{ + [Fact] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public static void Problem() + { + var test = new MutableStruct(420); + var from = new MutableStruct(42); + + var wrapper = -new TimeSpan(3); + + while (test.InternalValue >= from.InternalValue) + { + test.Add(wrapper.Ticks); + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj new file mode 100644 index 00000000000000..15edd99711a1a4 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj @@ -0,0 +1,8 @@ + + + True + + + + + \ No newline at end of file