| /* |
| * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #include "precompiled.hpp" |
| #include "gc/parallel/objectStartArray.inline.hpp" |
| #include "gc/parallel/parallelScavengeHeap.inline.hpp" |
| #include "gc/parallel/psCardTable.hpp" |
| #include "gc/parallel/psPromotionManager.inline.hpp" |
| #include "gc/parallel/psScavenge.inline.hpp" |
| #include "gc/parallel/psYoungGen.hpp" |
| #include "memory/iterator.inline.hpp" |
| #include "oops/access.inline.hpp" |
| #include "oops/oop.inline.hpp" |
| #include "runtime/prefetch.inline.hpp" |
| #include "utilities/spinYield.hpp" |
| #include "utilities/align.hpp" |
| |
| // Checks an individual oop for missing precise marks. Mark |
| // may be either dirty or newgen. |
| class CheckForUnmarkedOops : public BasicOopIterateClosure { |
| private: |
| PSYoungGen* _young_gen; |
| PSCardTable* _card_table; |
| HeapWord* _unmarked_addr; |
| |
| protected: |
| template <class T> void do_oop_work(T* p) { |
| oop obj = RawAccess<>::oop_load(p); |
| if (_young_gen->is_in_reserved(obj) && |
| !_card_table->addr_is_marked_imprecise(p)) { |
| // Don't overwrite the first missing card mark |
| if (_unmarked_addr == nullptr) { |
| _unmarked_addr = (HeapWord*)p; |
| } |
| } |
| } |
| |
| public: |
| CheckForUnmarkedOops(PSYoungGen* young_gen, PSCardTable* card_table) : |
| _young_gen(young_gen), _card_table(card_table), _unmarked_addr(nullptr) { } |
| |
| virtual void do_oop(oop* p) { CheckForUnmarkedOops::do_oop_work(p); } |
| virtual void do_oop(narrowOop* p) { CheckForUnmarkedOops::do_oop_work(p); } |
| |
| bool has_unmarked_oop() { |
| return _unmarked_addr != nullptr; |
| } |
| }; |
| |
| // Checks all objects for the existence of some type of mark, |
| // precise or imprecise, dirty or newgen. |
| class CheckForUnmarkedObjects : public ObjectClosure { |
| private: |
| PSYoungGen* _young_gen; |
| PSCardTable* _card_table; |
| |
| public: |
| CheckForUnmarkedObjects() { |
| ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); |
| _young_gen = heap->young_gen(); |
| _card_table = heap->card_table(); |
| } |
| |
| // Card marks are not precise. The current system can leave us with |
| // a mismatch of precise marks and beginning of object marks. This means |
| // we test for missing precise marks first. If any are found, we don't |
| // fail unless the object head is also unmarked. |
| virtual void do_object(oop obj) { |
| CheckForUnmarkedOops object_check(_young_gen, _card_table); |
| obj->oop_iterate(&object_check); |
| if (object_check.has_unmarked_oop()) { |
| guarantee(_card_table->addr_is_marked_imprecise(obj), "Found unmarked young_gen object"); |
| } |
| } |
| }; |
| |
| // Checks for precise marking of oops as newgen. |
| class CheckForPreciseMarks : public BasicOopIterateClosure { |
| private: |
| PSYoungGen* _young_gen; |
| PSCardTable* _card_table; |
| |
| protected: |
| template <class T> void do_oop_work(T* p) { |
| oop obj = RawAccess<IS_NOT_NULL>::oop_load(p); |
| if (_young_gen->is_in_reserved(obj)) { |
| assert(_card_table->addr_is_marked_precise(p), "Found unmarked precise oop"); |
| _card_table->set_card_newgen(p); |
| } |
| } |
| |
| public: |
| CheckForPreciseMarks(PSYoungGen* young_gen, PSCardTable* card_table) : |
| _young_gen(young_gen), _card_table(card_table) { } |
| |
| virtual void do_oop(oop* p) { CheckForPreciseMarks::do_oop_work(p); } |
| virtual void do_oop(narrowOop* p) { CheckForPreciseMarks::do_oop_work(p); } |
| }; |
| |
| static void prefetch_write(void *p) { |
| if (PrefetchScanIntervalInBytes >= 0) { |
| Prefetch::write(p, PrefetchScanIntervalInBytes); |
| } |
| } |
| |
| void PSCardTable::scan_obj_with_limit(PSPromotionManager* pm, |
| oop obj, |
| HeapWord* start, |
| HeapWord* end) { |
| if (!obj->is_typeArray()) { |
| prefetch_write(start); |
| pm->push_contents_bounded(obj, start, end); |
| } |
| } |
| |
| void PSCardTable::pre_scavenge(HeapWord* old_gen_bottom, uint active_workers) { |
| _preprocessing_active_workers = active_workers; |
| } |
| |
| // The "shadow" table is a copy of the card table entries of the current stripe. |
| // It is used to separate card reading, clearing and redirtying which reduces |
| // complexity significantly. |
| class PSStripeShadowCardTable { |
| typedef CardTable::CardValue CardValue; |
| |
| const uint _card_shift; |
| const uint _card_size; |
| CardValue _table[PSCardTable::num_cards_in_stripe]; |
| const CardValue* _table_base; |
| |
| public: |
| PSStripeShadowCardTable(PSCardTable* pst, HeapWord* const start, HeapWord* const end) : |
| _card_shift(CardTable::card_shift()), |
| _card_size(CardTable::card_size()), |
| _table_base(_table - (uintptr_t(start) >> _card_shift)) { |
| size_t stripe_byte_size = pointer_delta(end, start) * HeapWordSize; |
| size_t copy_length = align_up(stripe_byte_size, _card_size) >> _card_shift; |
| // The end of the last stripe may not be card aligned as it is equal to old |
| // gen top at scavenge start. We should not clear the card containing old gen |
| // top if not card aligned because there can be promoted objects on that |
| // same card. If it was marked dirty because of the promoted objects and we |
| // cleared it, we would loose a card mark. |
| size_t clear_length = align_down(stripe_byte_size, _card_size) >> _card_shift; |
| CardValue* stripe_start_card = pst->byte_for(start); |
| memcpy(_table, stripe_start_card, copy_length); |
| memset(stripe_start_card, CardTable::clean_card_val(), clear_length); |
| } |
| |
| HeapWord* addr_for(const CardValue* const card) { |
| assert(card >= _table && card <= &_table[PSCardTable::num_cards_in_stripe], "out of bounds"); |
| return (HeapWord*) ((card - _table_base) << _card_shift); |
| } |
| |
| const CardValue* card_for(HeapWord* addr) { |
| return &_table_base[uintptr_t(addr) >> _card_shift]; |
| } |
| |
| bool is_dirty(const CardValue* const card) { |
| return !is_clean(card); |
| } |
| |
| bool is_clean(const CardValue* const card) { |
| assert(card >= _table && card < &_table[PSCardTable::num_cards_in_stripe], "out of bounds"); |
| return *card == PSCardTable::clean_card_val(); |
| } |
| |
| const CardValue* find_first_dirty_card(const CardValue* const start, |
| const CardValue* const end) { |
| for (const CardValue* i = start; i < end; ++i) { |
| if (is_dirty(i)) { |
| return i; |
| } |
| } |
| return end; |
| } |
| |
| const CardValue* find_first_clean_card(const CardValue* const start, |
| const CardValue* const end) { |
| for (const CardValue* i = start; i < end; ++i) { |
| if (is_clean(i)) { |
| return i; |
| } |
| } |
| return end; |
| } |
| }; |
| |
| template <typename Func> |
| void PSCardTable::process_range(Func&& object_start, |
| PSPromotionManager* pm, |
| HeapWord* const start, |
| HeapWord* const end) { |
| assert(start < end, "precondition"); |
| assert(is_card_aligned(start), "precondition"); |
| |
| PSStripeShadowCardTable sct(this, start, end); |
| |
| // end might not be card-aligned. |
| const CardValue* end_card = sct.card_for(end - 1) + 1; |
| |
| for (HeapWord* i_addr = start; i_addr < end; /* empty */) { |
| const CardValue* dirty_l = sct.find_first_dirty_card(sct.card_for(i_addr), end_card); |
| const CardValue* dirty_r = sct.find_first_clean_card(dirty_l, end_card); |
| |
| assert(dirty_l <= dirty_r, "inv"); |
| |
| if (dirty_l == dirty_r) { |
| assert(dirty_r == end_card, "inv"); |
| break; |
| } |
| |
| // Located a non-empty dirty chunk [dirty_l, dirty_r). |
| HeapWord* addr_l = sct.addr_for(dirty_l); |
| HeapWord* addr_r = MIN2(sct.addr_for(dirty_r), end); |
| |
| // Scan objects overlapping [addr_l, addr_r) limited to [start, end). |
| HeapWord* obj_addr = object_start(addr_l); |
| |
| while (true) { |
| assert(obj_addr < addr_r, "inv"); |
| |
| oop obj = cast_to_oop(obj_addr); |
| const bool is_obj_array = obj->is_objArray(); |
| HeapWord* const obj_end_addr = obj_addr + obj->size(); |
| |
| if (is_obj_array) { |
| // Always scan obj arrays precisely (they are always marked precisely) |
| // to avoid unnecessary work. |
| scan_obj_with_limit(pm, obj, addr_l, addr_r); |
| } else { |
| if (obj_addr < i_addr && i_addr > start) { |
| // Already scanned this object. Has been one that spans multiple dirty chunks. |
| // The second condition makes sure objects reaching in the stripe are scanned once. |
| } else { |
| scan_obj_with_limit(pm, obj, addr_l, end); |
| } |
| } |
| |
| if (obj_end_addr >= addr_r) { |
| i_addr = is_obj_array ? addr_r : obj_end_addr; |
| break; |
| } |
| |
| // Move to next obj inside this dirty chunk. |
| obj_addr = obj_end_addr; |
| } |
| |
| // Finished a dirty chunk. |
| pm->drain_stacks_cond_depth(); |
| } |
| } |
| |
| template <typename Func> |
| void PSCardTable::preprocess_card_table_parallel(Func&& object_start, |
| HeapWord* old_gen_bottom, |
| HeapWord* old_gen_top, |
| uint stripe_index, |
| uint n_stripes) { |
| const size_t num_cards_in_slice = num_cards_in_stripe * n_stripes; |
| CardValue* cur_card = byte_for(old_gen_bottom) + stripe_index * num_cards_in_stripe; |
| CardValue* const end_card = byte_for(old_gen_top - 1) + 1; |
| |
| for (/* empty */; cur_card < end_card; cur_card += num_cards_in_slice) { |
| HeapWord* stripe_addr = addr_for(cur_card); |
| if (is_dirty(cur_card)) { |
| // The first card of this stripe is already dirty, no need to see if the |
| // reaching-in object is a potentially imprecisely marked non-array |
| // object. |
| continue; |
| } |
| HeapWord* first_obj_addr = object_start(stripe_addr); |
| if (first_obj_addr == stripe_addr) { |
| // No object reaching into this stripe. |
| continue; |
| } |
| oop first_obj = cast_to_oop(first_obj_addr); |
| if (!first_obj->is_array() && is_dirty(byte_for(first_obj_addr))) { |
| // Found a non-array object reaching into the stripe that has |
| // potentially been marked imprecisely. Mark first card of the stripe |
| // dirty so it will be processed later. |
| *cur_card = dirty_card_val(); |
| } |
| } |
| } |
| |
| // We get passed the space_top value to prevent us from traversing into |
| // the old_gen promotion labs, which cannot be safely parsed. |
| |
| // Do not call this method if the space is empty. |
| // It is a waste to start tasks and get here only to |
| // do no work. This method is just a no-op if space_top == sp->bottom(). |
| |
| // The generation (old gen) is divided into slices, which are further |
| // subdivided into stripes, with one stripe per GC thread. The size of |
| // a stripe is a constant, num_cards_in_stripe. |
| // |
| // +===============+ slice 0 |
| // | stripe 0 | |
| // +---------------+ |
| // | stripe 1 | |
| // +---------------+ |
| // | stripe 2 | |
| // +---------------+ |
| // | stripe 3 | |
| // +===============+ slice 1 |
| // | stripe 0 | |
| // +---------------+ |
| // | stripe 1 | |
| // +---------------+ |
| // | stripe 2 | |
| // +---------------+ |
| // | stripe 3 | |
| // +===============+ slice 2 |
| // ... |
| // |
| // In this case there are 4 threads, so 4 stripes. A GC thread first works on |
| // its stripe within slice 0 and then moves to its stripe in the next slice |
| // until it has exceeded the top of the generation. The distance to stripe in |
| // the next slice is calculated based on the number of stripes. After finishing |
| // stripe 0 in slice 0, the thread finds the stripe 0 in slice 1 by adding |
| // slice_size_in_words to the start of stripe 0 in slice 0 to get to the start |
| // of stripe 0 in slice 1. |
| |
| // Scavenging and accesses to the card table are strictly limited to the stripe. |
| // In particular scavenging of an object crossing stripe boundaries is shared |
| // among the threads assigned to the stripes it resides on. This reduces |
| // complexity and enables shared scanning of large objects. |
| // It requires preprocessing of the card table though where imprecise card marks of |
| // objects crossing stripe boundaries are propagated to the first card of |
| // each stripe covered by the individual object. |
| |
| void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array, |
| HeapWord* old_gen_bottom, |
| HeapWord* old_gen_top, |
| PSPromotionManager* pm, |
| uint stripe_index, |
| uint n_stripes) { |
| // ObjectStartArray queries can be expensive for large objects. We cache known objects. |
| struct { |
| HeapWord* start_addr; |
| HeapWord* end_addr; |
| } cached_obj {nullptr, old_gen_bottom}; |
| |
| // Queries must be monotonic because we don't check addr >= cached_obj.start_addr. |
| auto object_start = [&] (HeapWord* addr) { |
| if (addr < cached_obj.end_addr) { |
| assert(cached_obj.start_addr != nullptr, "inv"); |
| return cached_obj.start_addr; |
| } |
| HeapWord* result = start_array->object_start(addr); |
| |
| cached_obj.start_addr = result; |
| cached_obj.end_addr = result + cast_to_oop(result)->size(); |
| |
| return result; |
| }; |
| |
| // Prepare scavenge. |
| preprocess_card_table_parallel(object_start, old_gen_bottom, old_gen_top, stripe_index, n_stripes); |
| |
| // Sync with other workers. |
| Atomic::dec(&_preprocessing_active_workers); |
| SpinYield spin_yield; |
| while (Atomic::load_acquire(&_preprocessing_active_workers) > 0) { |
| spin_yield.wait(); |
| } |
| |
| // Scavenge |
| cached_obj = {nullptr, old_gen_bottom}; |
| const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words; |
| const size_t slice_size_in_words = stripe_size_in_words * n_stripes; |
| HeapWord* cur_addr = old_gen_bottom + stripe_index * stripe_size_in_words; |
| for (/* empty */; cur_addr < old_gen_top; cur_addr += slice_size_in_words) { |
| HeapWord* const stripe_l = cur_addr; |
| HeapWord* const stripe_r = MIN2(cur_addr + stripe_size_in_words, |
| old_gen_top); |
| |
| process_range(object_start, pm, stripe_l, stripe_r); |
| } |
| } |
| |
| // This should be called before a scavenge. |
| void PSCardTable::verify_all_young_refs_imprecise() { |
| CheckForUnmarkedObjects check; |
| |
| ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); |
| PSOldGen* old_gen = heap->old_gen(); |
| |
| old_gen->object_iterate(&check); |
| } |
| |
| // This should be called immediately after a scavenge, before mutators resume. |
| void PSCardTable::verify_all_young_refs_precise() { |
| ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); |
| PSOldGen* old_gen = heap->old_gen(); |
| |
| CheckForPreciseMarks check(heap->young_gen(), this); |
| |
| old_gen->oop_iterate(&check); |
| |
| verify_all_young_refs_precise_helper(old_gen->object_space()->used_region()); |
| } |
| |
| void PSCardTable::verify_all_young_refs_precise_helper(MemRegion mr) { |
| CardValue* bot = byte_for(mr.start()); |
| CardValue* top = byte_for(mr.end()); |
| while (bot <= top) { |
| assert(*bot == clean_card || *bot == verify_card, "Found unwanted or unknown card mark"); |
| if (*bot == verify_card) |
| *bot = youngergen_card; |
| bot++; |
| } |
| } |
| |
| bool PSCardTable::addr_is_marked_imprecise(void *addr) { |
| CardValue* p = byte_for(addr); |
| CardValue val = *p; |
| |
| if (card_is_dirty(val)) |
| return true; |
| |
| if (card_is_newgen(val)) |
| return true; |
| |
| if (card_is_clean(val)) |
| return false; |
| |
| assert(false, "Found unhandled card mark type"); |
| |
| return false; |
| } |
| |
| // Also includes verify_card |
| bool PSCardTable::addr_is_marked_precise(void *addr) { |
| CardValue* p = byte_for(addr); |
| CardValue val = *p; |
| |
| if (card_is_newgen(val)) |
| return true; |
| |
| if (card_is_verify(val)) |
| return true; |
| |
| if (card_is_clean(val)) |
| return false; |
| |
| if (card_is_dirty(val)) |
| return false; |
| |
| assert(false, "Found unhandled card mark type"); |
| |
| return false; |
| } |
| |
| bool PSCardTable::is_in_young(const void* p) const { |
| return ParallelScavengeHeap::heap()->is_in_young(p); |
| } |