| /* |
| * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #include "precompiled.hpp" |
| #include "gc/g1/g1BarrierSet.hpp" |
| #include "gc/g1/g1CollectionSet.hpp" |
| #include "gc/g1/g1ConcurrentRefine.hpp" |
| #include "gc/g1/g1ConcurrentRefineThread.hpp" |
| #include "gc/g1/g1DirtyCardQueue.hpp" |
| #include "gc/g1/g1Policy.hpp" |
| #include "gc/g1/heapRegion.inline.hpp" |
| #include "gc/g1/heapRegionRemSet.inline.hpp" |
| #include "gc/shared/gc_globals.hpp" |
| #include "logging/log.hpp" |
| #include "memory/allocation.inline.hpp" |
| #include "memory/iterator.hpp" |
| #include "runtime/java.hpp" |
| #include "runtime/mutexLocker.hpp" |
| #include "utilities/debug.hpp" |
| #include "utilities/globalDefinitions.hpp" |
| #include <math.h> |
| |
| G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) { |
| G1ConcurrentRefineThread* result = nullptr; |
| if (initializing || !InjectGCWorkerCreationFailure) { |
| result = G1ConcurrentRefineThread::create(_cr, worker_id); |
| } |
| if (result == nullptr || result->osthread() == nullptr) { |
| log_warning(gc)("Failed to create refinement thread %u, no more %s", |
| worker_id, |
| result == nullptr ? "memory" : "OS threads"); |
| if (result != nullptr) { |
| delete result; |
| result = nullptr; |
| } |
| } |
| return result; |
| } |
| |
| G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() : |
| _cr(nullptr), |
| _threads(nullptr), |
| _max_num_threads(0) |
| {} |
| |
| G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() { |
| if (_threads != nullptr) { |
| for (uint i = 0; i < _max_num_threads; i++) { |
| G1ConcurrentRefineThread* t = _threads[i]; |
| if (t == nullptr) { |
| #ifdef ASSERT |
| for (uint j = i + 1; j < _max_num_threads; ++j) { |
| assert(_threads[j] == nullptr, "invariant"); |
| } |
| #endif // ASSERT |
| break; |
| } else { |
| delete t; |
| } |
| } |
| FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads); |
| } |
| } |
| |
| jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint max_num_threads) { |
| assert(cr != nullptr, "G1ConcurrentRefine must not be null"); |
| _cr = cr; |
| _max_num_threads = max_num_threads; |
| |
| if (max_num_threads > 0) { |
| _threads = NEW_C_HEAP_ARRAY(G1ConcurrentRefineThread*, max_num_threads, mtGC); |
| |
| _threads[0] = create_refinement_thread(0, true); |
| if (_threads[0] == nullptr) { |
| vm_shutdown_during_initialization("Could not allocate primary refinement thread"); |
| return JNI_ENOMEM; |
| } |
| |
| if (UseDynamicNumberOfGCThreads) { |
| for (uint i = 1; i < max_num_threads; ++i) { |
| _threads[i] = nullptr; |
| } |
| } else { |
| for (uint i = 1; i < max_num_threads; ++i) { |
| _threads[i] = create_refinement_thread(i, true); |
| if (_threads[i] == nullptr) { |
| vm_shutdown_during_initialization("Could not allocate refinement threads."); |
| return JNI_ENOMEM; |
| } |
| } |
| } |
| } |
| |
| return JNI_OK; |
| } |
| |
| #ifdef ASSERT |
| void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const { |
| assert(_threads != nullptr, "No threads"); |
| assert(Thread::current() == _threads[0], "Not primary thread"); |
| } |
| #endif // ASSERT |
| |
| bool G1ConcurrentRefineThreadControl::activate(uint worker_id) { |
| assert(worker_id < _max_num_threads, "precondition"); |
| G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id]; |
| if (thread_to_activate == nullptr) { |
| thread_to_activate = create_refinement_thread(worker_id, false); |
| if (thread_to_activate == nullptr) { |
| return false; |
| } |
| _threads[worker_id] = thread_to_activate; |
| } |
| thread_to_activate->activate(); |
| return true; |
| } |
| |
| void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) { |
| for (uint i = 0; i < _max_num_threads; i++) { |
| if (_threads[i] != nullptr) { |
| tc->do_thread(_threads[i]); |
| } |
| } |
| } |
| |
| void G1ConcurrentRefineThreadControl::stop() { |
| for (uint i = 0; i < _max_num_threads; i++) { |
| if (_threads[i] != nullptr) { |
| _threads[i]->stop(); |
| } |
| } |
| } |
| |
| uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const { |
| // Instead of a fixed value, this could be a command line option. But then |
| // we might also want to allow configuration of adjust_threads_wait_ms(). |
| return 50; |
| } |
| |
| static size_t minimum_pending_cards_target() { |
| // One buffer per thread. |
| return ParallelGCThreads * G1UpdateBufferSize; |
| } |
| |
| G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) : |
| _policy(policy), |
| _threads_wanted(0), |
| _pending_cards_target(PendingCardsTargetUninitialized), |
| _last_adjust(), |
| _needs_adjust(false), |
| _threads_needed(policy, adjust_threads_period_ms()), |
| _thread_control(), |
| _dcqs(G1BarrierSet::dirty_card_queue_set()) |
| {} |
| |
| jint G1ConcurrentRefine::initialize() { |
| return _thread_control.initialize(this, max_num_threads()); |
| } |
| |
| G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) { |
| G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy); |
| *ecode = cr->initialize(); |
| if (*ecode != 0) { |
| delete cr; |
| cr = nullptr; |
| } |
| return cr; |
| } |
| |
| void G1ConcurrentRefine::stop() { |
| _thread_control.stop(); |
| } |
| |
| G1ConcurrentRefine::~G1ConcurrentRefine() { |
| } |
| |
| void G1ConcurrentRefine::threads_do(ThreadClosure *tc) { |
| _thread_control.worker_threads_do(tc); |
| } |
| |
| uint G1ConcurrentRefine::max_num_threads() { |
| return G1ConcRefinementThreads; |
| } |
| |
| void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms, |
| size_t processed_logged_cards, |
| size_t predicted_thread_buffer_cards, |
| double goal_ms) { |
| size_t minimum = minimum_pending_cards_target(); |
| if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) { |
| log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu", |
| _pending_cards_target); |
| return; |
| } |
| |
| // Base the pending cards budget on the measured rate. |
| double rate = processed_logged_cards / logged_cards_time_ms; |
| size_t budget = static_cast<size_t>(goal_ms * rate); |
| // Deduct predicted cards in thread buffers to get target. |
| size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards); |
| // Add some hysteresis with previous values. |
| if (is_pending_cards_target_initialized()) { |
| new_target = (new_target + _pending_cards_target) / 2; |
| } |
| // Apply minimum target. |
| new_target = MAX2(new_target, minimum_pending_cards_target()); |
| _pending_cards_target = new_target; |
| log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target); |
| } |
| |
| void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms, |
| size_t processed_logged_cards, |
| size_t predicted_thread_buffer_cards, |
| double goal_ms) { |
| if (!G1UseConcRefinement) return; |
| |
| update_pending_cards_target(logged_cards_time_ms, |
| processed_logged_cards, |
| predicted_thread_buffer_cards, |
| goal_ms); |
| if (_thread_control.max_num_threads() == 0) { |
| // If no refinement threads then the mutator threshold is the target. |
| _dcqs.set_mutator_refinement_threshold(_pending_cards_target); |
| } else { |
| // Provisionally make the mutator threshold unlimited, to be updated by |
| // the next periodic adjustment. Because card state may have changed |
| // drastically, record that adjustment is needed and kick the primary |
| // thread, in case it is waiting. |
| _dcqs.set_mutator_refinement_threshold(SIZE_MAX); |
| _needs_adjust = true; |
| if (is_pending_cards_target_initialized()) { |
| _thread_control.activate(0); |
| } |
| } |
| } |
| |
| // Wake up the primary thread less frequently when the time available until |
| // the next GC is longer. But don't increase the wait time too rapidly. |
| // This reduces the number of primary thread wakeups that just immediately |
| // go back to waiting, while still being responsive to behavior changes. |
| static uint64_t compute_adjust_wait_time_ms(double available_ms) { |
| return static_cast<uint64_t>(sqrt(available_ms) * 4.0); |
| } |
| |
| uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const { |
| assert_current_thread_is_primary_refinement_thread(); |
| if (is_pending_cards_target_initialized()) { |
| double available_ms = _threads_needed.predicted_time_until_next_gc_ms(); |
| uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms); |
| return MAX2(wait_time_ms, adjust_threads_period_ms()); |
| } else { |
| // If target not yet initialized then wait forever (until explicitly |
| // activated). This happens during startup, when we don't bother with |
| // refinement. |
| return 0; |
| } |
| } |
| |
| class G1ConcurrentRefine::RemSetSamplingClosure : public HeapRegionClosure { |
| G1CollectionSet* _cset; |
| size_t _sampled_rs_length; |
| |
| public: |
| explicit RemSetSamplingClosure(G1CollectionSet* cset) : |
| _cset(cset), _sampled_rs_length(0) {} |
| |
| bool do_heap_region(HeapRegion* r) override { |
| size_t rs_length = r->rem_set()->occupied(); |
| _sampled_rs_length += rs_length; |
| return false; |
| } |
| |
| size_t sampled_rs_length() const { return _sampled_rs_length; } |
| }; |
| |
| // Adjust the target length (in regions) of the young gen, based on the the |
| // current length of the remembered sets. |
| // |
| // At the end of the GC G1 determines the length of the young gen based on |
| // how much time the next GC can take, and when the next GC may occur |
| // according to the MMU. |
| // |
| // The assumption is that a significant part of the GC is spent on scanning |
| // the remembered sets (and many other components), so this thread constantly |
| // reevaluates the prediction for the remembered set scanning costs, and potentially |
| // resizes the young gen. This may do a premature GC or even increase the young |
| // gen size to keep pause time length goal. |
| void G1ConcurrentRefine::adjust_young_list_target_length() { |
| if (_policy->use_adaptive_young_list_length()) { |
| G1CollectionSet* cset = G1CollectedHeap::heap()->collection_set(); |
| RemSetSamplingClosure cl{cset}; |
| cset->iterate(&cl); |
| _policy->revise_young_list_target_length(cl.sampled_rs_length()); |
| } |
| } |
| |
| bool G1ConcurrentRefine::adjust_threads_periodically() { |
| assert_current_thread_is_primary_refinement_thread(); |
| |
| // Check whether it's time to do a periodic adjustment. |
| if (!_needs_adjust) { |
| Tickspan since_adjust = Ticks::now() - _last_adjust; |
| if (since_adjust.milliseconds() >= adjust_threads_period_ms()) { |
| _needs_adjust = true; |
| } |
| } |
| |
| // If needed, try to adjust threads wanted. |
| if (_needs_adjust) { |
| // Getting used young bytes requires holding Heap_lock. But we can't use |
| // normal lock and block until available. Blocking on the lock could |
| // deadlock with a GC VMOp that is holding the lock and requesting a |
| // safepoint. Instead try to lock, and if fail then skip adjustment for |
| // this iteration of the thread, do some refinement work, and retry the |
| // adjustment later. |
| if (Heap_lock->try_lock()) { |
| size_t used_bytes = _policy->estimate_used_young_bytes_locked(); |
| Heap_lock->unlock(); |
| adjust_young_list_target_length(); |
| size_t young_bytes = _policy->young_list_target_length() * HeapRegion::GrainBytes; |
| size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes); |
| adjust_threads_wanted(available_bytes); |
| _needs_adjust = false; |
| _last_adjust = Ticks::now(); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool G1ConcurrentRefine::is_in_last_adjustment_period() const { |
| return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms(); |
| } |
| |
| void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) { |
| assert_current_thread_is_primary_refinement_thread(); |
| size_t num_cards = _dcqs.num_cards(); |
| size_t mutator_threshold = SIZE_MAX; |
| uint old_wanted = Atomic::load(&_threads_wanted); |
| |
| _threads_needed.update(old_wanted, |
| available_bytes, |
| num_cards, |
| _pending_cards_target); |
| uint new_wanted = _threads_needed.threads_needed(); |
| if (new_wanted > _thread_control.max_num_threads()) { |
| // If running all the threads can't reach goal, turn on refinement by |
| // mutator threads. Using target as the threshold may be stronger |
| // than required, but will do the most to get us under goal, and we'll |
| // reevaluate with the next adjustment. |
| mutator_threshold = _pending_cards_target; |
| new_wanted = _thread_control.max_num_threads(); |
| } else if (is_in_last_adjustment_period()) { |
| // If very little time remains until GC, enable mutator refinement. If |
| // the target has been reached, this keeps the number of pending cards on |
| // target even if refinement threads deactivate in the meantime. And if |
| // the target hasn't been reached, this prevents things from getting |
| // worse. |
| mutator_threshold = _pending_cards_target; |
| } |
| Atomic::store(&_threads_wanted, new_wanted); |
| _dcqs.set_mutator_refinement_threshold(mutator_threshold); |
| log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, " |
| "predicted: %zu, time: %1.2fms", |
| new_wanted, |
| num_cards, |
| _threads_needed.predicted_cards_at_next_gc(), |
| _threads_needed.predicted_time_until_next_gc_ms()); |
| // Activate newly wanted threads. The current thread is the primary |
| // refinement thread, so is already active. |
| for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) { |
| if (!_thread_control.activate(i)) { |
| // Failed to allocate and activate thread. Stop trying to activate, and |
| // instead use mutator threads to make up the gap. |
| Atomic::store(&_threads_wanted, i); |
| _dcqs.set_mutator_refinement_threshold(_pending_cards_target); |
| break; |
| } |
| } |
| } |
| |
| void G1ConcurrentRefine::reduce_threads_wanted() { |
| assert_current_thread_is_primary_refinement_thread(); |
| if (!_needs_adjust) { // Defer if adjustment request is active. |
| uint wanted = Atomic::load(&_threads_wanted); |
| if (wanted > 0) { |
| Atomic::store(&_threads_wanted, --wanted); |
| } |
| // If very little time remains until GC, enable mutator refinement. If |
| // the target has been reached, this keeps the number of pending cards on |
| // target even as refinement threads deactivate in the meantime. |
| if (is_in_last_adjustment_period()) { |
| _dcqs.set_mutator_refinement_threshold(_pending_cards_target); |
| } |
| } |
| } |
| |
| bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const { |
| return worker_id < Atomic::load(&_threads_wanted); |
| } |
| |
| bool G1ConcurrentRefine::is_thread_adjustment_needed() const { |
| assert_current_thread_is_primary_refinement_thread(); |
| return _needs_adjust; |
| } |
| |
| void G1ConcurrentRefine::record_thread_adjustment_needed() { |
| assert_current_thread_is_primary_refinement_thread(); |
| _needs_adjust = true; |
| } |
| |
| G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() { |
| struct CollectStats : public ThreadClosure { |
| G1ConcurrentRefineStats _total_stats; |
| virtual void do_thread(Thread* t) { |
| G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t); |
| G1ConcurrentRefineStats& stats = *crt->refinement_stats(); |
| _total_stats += stats; |
| stats.reset(); |
| } |
| } collector; |
| threads_do(&collector); |
| return collector._total_stats; |
| } |
| |
| uint G1ConcurrentRefine::worker_id_offset() { |
| return G1DirtyCardQueueSet::num_par_ids(); |
| } |
| |
| bool G1ConcurrentRefine::try_refinement_step(uint worker_id, |
| size_t stop_at, |
| G1ConcurrentRefineStats* stats) { |
| uint adjusted_id = worker_id + worker_id_offset(); |
| return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats); |
| } |