src/hotspot/share/gc/parallel/psCardTable.cpp - toolchain/jdk/jdk21 - Git at Google

 /*
  * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  *
  */

 #include "precompiled.hpp"
 #include "gc/parallel/objectStartArray.inline.hpp"
 #include "gc/parallel/parallelScavengeHeap.inline.hpp"
 #include "gc/parallel/psCardTable.hpp"
 #include "gc/parallel/psPromotionManager.inline.hpp"
 #include "gc/parallel/psScavenge.inline.hpp"
 #include "gc/parallel/psYoungGen.hpp"
 #include "memory/iterator.inline.hpp"
 #include "oops/access.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 #include "utilities/spinYield.hpp"
 #include "utilities/align.hpp"

 // Checks an individual oop for missing precise marks. Mark
 // may be either dirty or newgen.
 class CheckForUnmarkedOops : public BasicOopIterateClosure {
  private:
   PSYoungGen*  _young_gen;
   PSCardTable* _card_table;
   HeapWord*    _unmarked_addr;

  protected:
   template <class T> void do_oop_work(T* p) {
     oop obj = RawAccess<>::oop_load(p);
     if (_young_gen->is_in_reserved(obj) &&
         !_card_table->addr_is_marked_imprecise(p)) {
       // Don't overwrite the first missing card mark
       if (_unmarked_addr == nullptr) {
         _unmarked_addr = (HeapWord*)p;
       }
     }
   }

  public:
   CheckForUnmarkedOops(PSYoungGen* young_gen, PSCardTable* card_table) :
     _young_gen(young_gen), _card_table(card_table), _unmarked_addr(nullptr) { }

   virtual void do_oop(oop* p)       { CheckForUnmarkedOops::do_oop_work(p); }
   virtual void do_oop(narrowOop* p) { CheckForUnmarkedOops::do_oop_work(p); }

   bool has_unmarked_oop() {
     return _unmarked_addr != nullptr;
   }
 };

 // Checks all objects for the existence of some type of mark,
 // precise or imprecise, dirty or newgen.
 class CheckForUnmarkedObjects : public ObjectClosure {
  private:
   PSYoungGen*  _young_gen;
   PSCardTable* _card_table;

  public:
   CheckForUnmarkedObjects() {
     ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
     _young_gen = heap->young_gen();
     _card_table = heap->card_table();
   }

   // Card marks are not precise. The current system can leave us with
   // a mismatch of precise marks and beginning of object marks. This means
   // we test for missing precise marks first. If any are found, we don't
   // fail unless the object head is also unmarked.
   virtual void do_object(oop obj) {
     CheckForUnmarkedOops object_check(_young_gen, _card_table);
     obj->oop_iterate(&object_check);
     if (object_check.has_unmarked_oop()) {
       guarantee(_card_table->addr_is_marked_imprecise(obj), "Found unmarked young_gen object");
     }
   }
 };

 // Checks for precise marking of oops as newgen.
 class CheckForPreciseMarks : public BasicOopIterateClosure {
  private:
   PSYoungGen*  _young_gen;
   PSCardTable* _card_table;

  protected:
   template <class T> void do_oop_work(T* p) {
     oop obj = RawAccess<IS_NOT_NULL>::oop_load(p);
     if (_young_gen->is_in_reserved(obj)) {
       assert(_card_table->addr_is_marked_precise(p), "Found unmarked precise oop");
       _card_table->set_card_newgen(p);
     }
   }

  public:
   CheckForPreciseMarks(PSYoungGen* young_gen, PSCardTable* card_table) :
     _young_gen(young_gen), _card_table(card_table) { }

   virtual void do_oop(oop* p)       { CheckForPreciseMarks::do_oop_work(p); }
   virtual void do_oop(narrowOop* p) { CheckForPreciseMarks::do_oop_work(p); }
 };

 static void prefetch_write(void *p) {
   if (PrefetchScanIntervalInBytes >= 0) {
     Prefetch::write(p, PrefetchScanIntervalInBytes);
   }
 }

 void PSCardTable::scan_obj_with_limit(PSPromotionManager* pm,
                                       oop obj,
                                       HeapWord* start,
                                       HeapWord* end) {
   if (!obj->is_typeArray()) {
     prefetch_write(start);
     pm->push_contents_bounded(obj, start, end);
   }
 }

 void PSCardTable::pre_scavenge(HeapWord* old_gen_bottom, uint active_workers) {
   _preprocessing_active_workers = active_workers;
 }

 // The "shadow" table is a copy of the card table entries of the current stripe.
 // It is used to separate card reading, clearing and redirtying which reduces
 // complexity significantly.
 class PSStripeShadowCardTable {
   typedef CardTable::CardValue CardValue;

   const uint _card_shift;
   const uint _card_size;
   CardValue _table[PSCardTable::num_cards_in_stripe];
   const CardValue* _table_base;

 public:
   PSStripeShadowCardTable(PSCardTable* pst, HeapWord* const start, HeapWord* const end) :
     _card_shift(CardTable::card_shift()),
     _card_size(CardTable::card_size()),
     _table_base(_table - (uintptr_t(start) >> _card_shift)) {
     size_t stripe_byte_size = pointer_delta(end, start) * HeapWordSize;
     size_t copy_length = align_up(stripe_byte_size, _card_size) >> _card_shift;
     // The end of the last stripe may not be card aligned as it is equal to old
     // gen top at scavenge start. We should not clear the card containing old gen
     // top if not card aligned because there can be promoted objects on that
     // same card. If it was marked dirty because of the promoted objects and we
     // cleared it, we would loose a card mark.
     size_t clear_length = align_down(stripe_byte_size, _card_size) >> _card_shift;
     CardValue* stripe_start_card = pst->byte_for(start);
     memcpy(_table, stripe_start_card, copy_length);
     memset(stripe_start_card, CardTable::clean_card_val(), clear_length);
   }

   HeapWord* addr_for(const CardValue* const card) {
     assert(card >= _table && card <=  &_table[PSCardTable::num_cards_in_stripe], "out of bounds");
     return (HeapWord*) ((card - _table_base) << _card_shift);
   }

   const CardValue* card_for(HeapWord* addr) {
     return &_table_base[uintptr_t(addr) >> _card_shift];
   }

   bool is_dirty(const CardValue* const card) {
     return !is_clean(card);
   }

   bool is_clean(const CardValue* const card) {
     assert(card >= _table && card <  &_table[PSCardTable::num_cards_in_stripe], "out of bounds");
     return *card == PSCardTable::clean_card_val();
   }

   const CardValue* find_first_dirty_card(const CardValue* const start,
                                          const CardValue* const end) {
     for (const CardValue* i = start; i < end; ++i) {
       if (is_dirty(i)) {
         return i;
       }
     }
     return end;
   }

   const CardValue* find_first_clean_card(const CardValue* const start,
                                          const CardValue* const end) {
     for (const CardValue* i = start; i < end; ++i) {
       if (is_clean(i)) {
         return i;
       }
     }
     return end;
   }
 };

 template <typename Func>
 void PSCardTable::process_range(Func&& object_start,
                                 PSPromotionManager* pm,
                                 HeapWord* const start,
                                 HeapWord* const end) {
   assert(start < end, "precondition");
   assert(is_card_aligned(start), "precondition");

   PSStripeShadowCardTable sct(this, start, end);

   // end might not be card-aligned.
   const CardValue* end_card = sct.card_for(end - 1) + 1;

   for (HeapWord* i_addr = start; i_addr < end; /* empty */) {
     const CardValue* dirty_l = sct.find_first_dirty_card(sct.card_for(i_addr), end_card);
     const CardValue* dirty_r = sct.find_first_clean_card(dirty_l, end_card);

     assert(dirty_l <= dirty_r, "inv");

     if (dirty_l == dirty_r) {
       assert(dirty_r == end_card, "inv");
       break;
     }

     // Located a non-empty dirty chunk [dirty_l, dirty_r).
     HeapWord* addr_l = sct.addr_for(dirty_l);
     HeapWord* addr_r = MIN2(sct.addr_for(dirty_r), end);

     // Scan objects overlapping [addr_l, addr_r) limited to [start, end).
     HeapWord* obj_addr = object_start(addr_l);

     while (true) {
       assert(obj_addr < addr_r, "inv");

       oop obj = cast_to_oop(obj_addr);
       const bool is_obj_array = obj->is_objArray();
       HeapWord* const obj_end_addr = obj_addr + obj->size();

       if (is_obj_array) {
         // Always scan obj arrays precisely (they are always marked precisely)
         // to avoid unnecessary work.
         scan_obj_with_limit(pm, obj, addr_l, addr_r);
       } else {
         if (obj_addr < i_addr && i_addr > start) {
           // Already scanned this object. Has been one that spans multiple dirty chunks.
           // The second condition makes sure objects reaching in the stripe are scanned once.
         } else {
           scan_obj_with_limit(pm, obj, addr_l, end);
         }
       }

       if (obj_end_addr >= addr_r) {
         i_addr = is_obj_array ? addr_r : obj_end_addr;
         break;
       }

       // Move to next obj inside this dirty chunk.
       obj_addr = obj_end_addr;
     }

     // Finished a dirty chunk.
     pm->drain_stacks_cond_depth();
   }
 }

 template <typename Func>
 void PSCardTable::preprocess_card_table_parallel(Func&& object_start,
                                                  HeapWord* old_gen_bottom,
                                                  HeapWord* old_gen_top,
                                                  uint stripe_index,
                                                  uint n_stripes) {
   const size_t num_cards_in_slice = num_cards_in_stripe * n_stripes;
   CardValue* cur_card = byte_for(old_gen_bottom) + stripe_index * num_cards_in_stripe;
   CardValue* const end_card = byte_for(old_gen_top - 1) + 1;

   for (/* empty */; cur_card < end_card; cur_card += num_cards_in_slice) {
     HeapWord* stripe_addr = addr_for(cur_card);
     if (is_dirty(cur_card)) {
       // The first card of this stripe is already dirty, no need to see if the
       // reaching-in object is a potentially imprecisely marked non-array
       // object.
       continue;
     }
     HeapWord* first_obj_addr = object_start(stripe_addr);
     if (first_obj_addr == stripe_addr) {
       // No object reaching into this stripe.
       continue;
     }
     oop first_obj = cast_to_oop(first_obj_addr);
     if (!first_obj->is_array() && is_dirty(byte_for(first_obj_addr))) {
       // Found a non-array object reaching into the stripe that has
       // potentially been marked imprecisely. Mark first card of the stripe
       // dirty so it will be processed later.
       *cur_card = dirty_card_val();
     }
   }
 }

 // We get passed the space_top value to prevent us from traversing into
 // the old_gen promotion labs, which cannot be safely parsed.

 // Do not call this method if the space is empty.
 // It is a waste to start tasks and get here only to
 // do no work. This method is just a no-op if space_top == sp->bottom().

 // The generation (old gen) is divided into slices, which are further
 // subdivided into stripes, with one stripe per GC thread. The size of
 // a stripe is a constant, num_cards_in_stripe.
 //
 //      +===============+        slice 0
 //      |  stripe 0     |
 //      +---------------+
 //      |  stripe 1     |
 //      +---------------+
 //      |  stripe 2     |
 //      +---------------+
 //      |  stripe 3     |
 //      +===============+        slice 1
 //      |  stripe 0     |
 //      +---------------+
 //      |  stripe 1     |
 //      +---------------+
 //      |  stripe 2     |
 //      +---------------+
 //      |  stripe 3     |
 //      +===============+        slice 2
 //      ...
 //
 // In this case there are 4 threads, so 4 stripes.  A GC thread first works on
 // its stripe within slice 0 and then moves to its stripe in the next slice
 // until it has exceeded the top of the generation.  The distance to stripe in
 // the next slice is calculated based on the number of stripes. After finishing
 // stripe 0 in slice 0, the thread finds the stripe 0 in slice 1 by adding
 // slice_size_in_words to the start of stripe 0 in slice 0 to get to the start
 // of stripe 0 in slice 1.

 // Scavenging and accesses to the card table are strictly limited to the stripe.
 // In particular scavenging of an object crossing stripe boundaries is shared
 // among the threads assigned to the stripes it resides on. This reduces
 // complexity and enables shared scanning of large objects.
 // It requires preprocessing of the card table though where imprecise card marks of
 // objects crossing stripe boundaries are propagated to the first card of
 // each stripe covered by the individual object.

 void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
                                              HeapWord* old_gen_bottom,
                                              HeapWord* old_gen_top,
                                              PSPromotionManager* pm,
                                              uint stripe_index,
                                              uint n_stripes) {
   // ObjectStartArray queries can be expensive for large objects. We cache known objects.
   struct {
     HeapWord* start_addr;
     HeapWord* end_addr;
   } cached_obj {nullptr, old_gen_bottom};

   // Queries must be monotonic because we don't check addr >= cached_obj.start_addr.
   auto object_start = [&] (HeapWord* addr) {
     if (addr < cached_obj.end_addr) {
       assert(cached_obj.start_addr != nullptr, "inv");
       return cached_obj.start_addr;
     }
     HeapWord* result = start_array->object_start(addr);

     cached_obj.start_addr = result;
     cached_obj.end_addr = result + cast_to_oop(result)->size();

     return result;
   };

   // Prepare scavenge.
   preprocess_card_table_parallel(object_start, old_gen_bottom, old_gen_top, stripe_index, n_stripes);

   // Sync with other workers.
   Atomic::dec(&_preprocessing_active_workers);
   SpinYield spin_yield;
   while (Atomic::load_acquire(&_preprocessing_active_workers) > 0) {
     spin_yield.wait();
   }

   // Scavenge
   cached_obj = {nullptr, old_gen_bottom};
   const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words;
   const size_t slice_size_in_words = stripe_size_in_words * n_stripes;
   HeapWord* cur_addr = old_gen_bottom + stripe_index * stripe_size_in_words;
   for (/* empty */; cur_addr < old_gen_top; cur_addr += slice_size_in_words) {
     HeapWord* const stripe_l = cur_addr;
     HeapWord* const stripe_r = MIN2(cur_addr + stripe_size_in_words,
                                     old_gen_top);

     process_range(object_start, pm, stripe_l, stripe_r);
   }
 }

 // This should be called before a scavenge.
 void PSCardTable::verify_all_young_refs_imprecise() {
   CheckForUnmarkedObjects check;

   ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
   PSOldGen* old_gen = heap->old_gen();

   old_gen->object_iterate(&check);
 }

 // This should be called immediately after a scavenge, before mutators resume.
 void PSCardTable::verify_all_young_refs_precise() {
   ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
   PSOldGen* old_gen = heap->old_gen();

   CheckForPreciseMarks check(heap->young_gen(), this);

   old_gen->oop_iterate(&check);

   verify_all_young_refs_precise_helper(old_gen->object_space()->used_region());
 }

 void PSCardTable::verify_all_young_refs_precise_helper(MemRegion mr) {
   CardValue* bot = byte_for(mr.start());
   CardValue* top = byte_for(mr.end());
   while (bot <= top) {
     assert(*bot == clean_card || *bot == verify_card, "Found unwanted or unknown card mark");
     if (*bot == verify_card)
       *bot = youngergen_card;
     bot++;
   }
 }

 bool PSCardTable::addr_is_marked_imprecise(void *addr) {
   CardValue* p = byte_for(addr);
   CardValue val = *p;

   if (card_is_dirty(val))
     return true;

   if (card_is_newgen(val))
     return true;

   if (card_is_clean(val))
     return false;

   assert(false, "Found unhandled card mark type");

   return false;
 }

 // Also includes verify_card
 bool PSCardTable::addr_is_marked_precise(void *addr) {
   CardValue* p = byte_for(addr);
   CardValue val = *p;

   if (card_is_newgen(val))
     return true;

   if (card_is_verify(val))
     return true;

   if (card_is_clean(val))
     return false;

   if (card_is_dirty(val))
     return false;

   assert(false, "Found unhandled card mark type");

   return false;
 }

 bool PSCardTable::is_in_young(const void* p) const {
   return ParallelScavengeHeap::heap()->is_in_young(p);
 }
	/*
	* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*
	*/

	#include "precompiled.hpp"
	#include "gc/parallel/objectStartArray.inline.hpp"
	#include "gc/parallel/parallelScavengeHeap.inline.hpp"
	#include "gc/parallel/psCardTable.hpp"
	#include "gc/parallel/psPromotionManager.inline.hpp"
	#include "gc/parallel/psScavenge.inline.hpp"
	#include "gc/parallel/psYoungGen.hpp"
	#include "memory/iterator.inline.hpp"
	#include "oops/access.inline.hpp"
	#include "oops/oop.inline.hpp"
	#include "runtime/prefetch.inline.hpp"
	#include "utilities/spinYield.hpp"
	#include "utilities/align.hpp"

	// Checks an individual oop for missing precise marks. Mark
	// may be either dirty or newgen.
	class CheckForUnmarkedOops : public BasicOopIterateClosure {
	private:
	PSYoungGen* _young_gen;
	PSCardTable* _card_table;
	HeapWord* _unmarked_addr;

	protected:
	template <class T> void do_oop_work(T* p) {
	oop obj = RawAccess<>::oop_load(p);
	if (_young_gen->is_in_reserved(obj) &&
	!_card_table->addr_is_marked_imprecise(p)) {
	// Don't overwrite the first missing card mark
	if (_unmarked_addr == nullptr) {
	_unmarked_addr = (HeapWord*)p;
	}
	}
	}

	public:
	CheckForUnmarkedOops(PSYoungGen* young_gen, PSCardTable* card_table) :
	_young_gen(young_gen), _card_table(card_table), _unmarked_addr(nullptr) { }

	virtual void do_oop(oop* p) { CheckForUnmarkedOops::do_oop_work(p); }
	virtual void do_oop(narrowOop* p) { CheckForUnmarkedOops::do_oop_work(p); }

	bool has_unmarked_oop() {
	return _unmarked_addr != nullptr;
	}
	};

	// Checks all objects for the existence of some type of mark,
	// precise or imprecise, dirty or newgen.
	class CheckForUnmarkedObjects : public ObjectClosure {
	private:
	PSYoungGen* _young_gen;
	PSCardTable* _card_table;

	public:
	CheckForUnmarkedObjects() {
	ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
	_young_gen = heap->young_gen();
	_card_table = heap->card_table();
	}

	// Card marks are not precise. The current system can leave us with
	// a mismatch of precise marks and beginning of object marks. This means
	// we test for missing precise marks first. If any are found, we don't
	// fail unless the object head is also unmarked.
	virtual void do_object(oop obj) {
	CheckForUnmarkedOops object_check(_young_gen, _card_table);
	obj->oop_iterate(&object_check);
	if (object_check.has_unmarked_oop()) {
	guarantee(_card_table->addr_is_marked_imprecise(obj), "Found unmarked young_gen object");
	}
	}
	};

	// Checks for precise marking of oops as newgen.
	class CheckForPreciseMarks : public BasicOopIterateClosure {
	private:
	PSYoungGen* _young_gen;
	PSCardTable* _card_table;

	protected:
	template <class T> void do_oop_work(T* p) {
	oop obj = RawAccess<IS_NOT_NULL>::oop_load(p);
	if (_young_gen->is_in_reserved(obj)) {
	assert(_card_table->addr_is_marked_precise(p), "Found unmarked precise oop");
	_card_table->set_card_newgen(p);
	}
	}

	public:
	CheckForPreciseMarks(PSYoungGen* young_gen, PSCardTable* card_table) :
	_young_gen(young_gen), _card_table(card_table) { }

	virtual void do_oop(oop* p) { CheckForPreciseMarks::do_oop_work(p); }
	virtual void do_oop(narrowOop* p) { CheckForPreciseMarks::do_oop_work(p); }
	};

	static void prefetch_write(void *p) {
	if (PrefetchScanIntervalInBytes >= 0) {
	Prefetch::write(p, PrefetchScanIntervalInBytes);
	}
	}

	void PSCardTable::scan_obj_with_limit(PSPromotionManager* pm,
	oop obj,
	HeapWord* start,
	HeapWord* end) {
	if (!obj->is_typeArray()) {
	prefetch_write(start);
	pm->push_contents_bounded(obj, start, end);
	}
	}

	void PSCardTable::pre_scavenge(HeapWord* old_gen_bottom, uint active_workers) {
	_preprocessing_active_workers = active_workers;
	}

	// The "shadow" table is a copy of the card table entries of the current stripe.
	// It is used to separate card reading, clearing and redirtying which reduces
	// complexity significantly.
	class PSStripeShadowCardTable {
	typedef CardTable::CardValue CardValue;

	const uint _card_shift;
	const uint _card_size;
	CardValue _table[PSCardTable::num_cards_in_stripe];
	const CardValue* _table_base;

	public:
	PSStripeShadowCardTable(PSCardTable* pst, HeapWord* const start, HeapWord* const end) :
	_card_shift(CardTable::card_shift()),
	_card_size(CardTable::card_size()),
	_table_base(_table - (uintptr_t(start) >> _card_shift)) {
	size_t stripe_byte_size = pointer_delta(end, start) * HeapWordSize;
	size_t copy_length = align_up(stripe_byte_size, _card_size) >> _card_shift;
	// The end of the last stripe may not be card aligned as it is equal to old
	// gen top at scavenge start. We should not clear the card containing old gen
	// top if not card aligned because there can be promoted objects on that
	// same card. If it was marked dirty because of the promoted objects and we
	// cleared it, we would loose a card mark.
	size_t clear_length = align_down(stripe_byte_size, _card_size) >> _card_shift;
	CardValue* stripe_start_card = pst->byte_for(start);
	memcpy(_table, stripe_start_card, copy_length);
	memset(stripe_start_card, CardTable::clean_card_val(), clear_length);
	}

	HeapWord* addr_for(const CardValue* const card) {
	assert(card >= _table && card <= &_table[PSCardTable::num_cards_in_stripe], "out of bounds");
	return (HeapWord*) ((card - _table_base) << _card_shift);
	}

	const CardValue* card_for(HeapWord* addr) {
	return &_table_base[uintptr_t(addr) >> _card_shift];
	}

	bool is_dirty(const CardValue* const card) {
	return !is_clean(card);
	}

	bool is_clean(const CardValue* const card) {
	assert(card >= _table && card < &_table[PSCardTable::num_cards_in_stripe], "out of bounds");
	return *card == PSCardTable::clean_card_val();
	}

	const CardValue* find_first_dirty_card(const CardValue* const start,
	const CardValue* const end) {
	for (const CardValue* i = start; i < end; ++i) {
	if (is_dirty(i)) {
	return i;
	}
	}
	return end;
	}

	const CardValue* find_first_clean_card(const CardValue* const start,
	const CardValue* const end) {
	for (const CardValue* i = start; i < end; ++i) {
	if (is_clean(i)) {
	return i;
	}
	}
	return end;
	}
	};

	template <typename Func>
	void PSCardTable::process_range(Func&& object_start,
	PSPromotionManager* pm,
	HeapWord* const start,
	HeapWord* const end) {
	assert(start < end, "precondition");
	assert(is_card_aligned(start), "precondition");

	PSStripeShadowCardTable sct(this, start, end);

	// end might not be card-aligned.
	const CardValue* end_card = sct.card_for(end - 1) + 1;

	for (HeapWord* i_addr = start; i_addr < end; /* empty */) {
	const CardValue* dirty_l = sct.find_first_dirty_card(sct.card_for(i_addr), end_card);
	const CardValue* dirty_r = sct.find_first_clean_card(dirty_l, end_card);

	assert(dirty_l <= dirty_r, "inv");

	if (dirty_l == dirty_r) {
	assert(dirty_r == end_card, "inv");
	break;
	}

	// Located a non-empty dirty chunk [dirty_l, dirty_r).
	HeapWord* addr_l = sct.addr_for(dirty_l);
	HeapWord* addr_r = MIN2(sct.addr_for(dirty_r), end);

	// Scan objects overlapping [addr_l, addr_r) limited to [start, end).
	HeapWord* obj_addr = object_start(addr_l);

	while (true) {
	assert(obj_addr < addr_r, "inv");

	oop obj = cast_to_oop(obj_addr);
	const bool is_obj_array = obj->is_objArray();
	HeapWord* const obj_end_addr = obj_addr + obj->size();

	if (is_obj_array) {
	// Always scan obj arrays precisely (they are always marked precisely)
	// to avoid unnecessary work.
	scan_obj_with_limit(pm, obj, addr_l, addr_r);
	} else {
	if (obj_addr < i_addr && i_addr > start) {
	// Already scanned this object. Has been one that spans multiple dirty chunks.
	// The second condition makes sure objects reaching in the stripe are scanned once.
	} else {
	scan_obj_with_limit(pm, obj, addr_l, end);
	}
	}

	if (obj_end_addr >= addr_r) {
	i_addr = is_obj_array ? addr_r : obj_end_addr;
	break;
	}

	// Move to next obj inside this dirty chunk.
	obj_addr = obj_end_addr;
	}

	// Finished a dirty chunk.
	pm->drain_stacks_cond_depth();
	}
	}

	template <typename Func>
	void PSCardTable::preprocess_card_table_parallel(Func&& object_start,
	HeapWord* old_gen_bottom,
	HeapWord* old_gen_top,
	uint stripe_index,
	uint n_stripes) {
	const size_t num_cards_in_slice = num_cards_in_stripe * n_stripes;
	CardValue* cur_card = byte_for(old_gen_bottom) + stripe_index * num_cards_in_stripe;
	CardValue* const end_card = byte_for(old_gen_top - 1) + 1;

	for (/* empty */; cur_card < end_card; cur_card += num_cards_in_slice) {
	HeapWord* stripe_addr = addr_for(cur_card);
	if (is_dirty(cur_card)) {
	// The first card of this stripe is already dirty, no need to see if the
	// reaching-in object is a potentially imprecisely marked non-array
	// object.
	continue;
	}
	HeapWord* first_obj_addr = object_start(stripe_addr);
	if (first_obj_addr == stripe_addr) {
	// No object reaching into this stripe.
	continue;
	}
	oop first_obj = cast_to_oop(first_obj_addr);
	if (!first_obj->is_array() && is_dirty(byte_for(first_obj_addr))) {
	// Found a non-array object reaching into the stripe that has
	// potentially been marked imprecisely. Mark first card of the stripe
	// dirty so it will be processed later.
	*cur_card = dirty_card_val();
	}
	}
	}

	// We get passed the space_top value to prevent us from traversing into
	// the old_gen promotion labs, which cannot be safely parsed.

	// Do not call this method if the space is empty.
	// It is a waste to start tasks and get here only to
	// do no work. This method is just a no-op if space_top == sp->bottom().

	// The generation (old gen) is divided into slices, which are further
	// subdivided into stripes, with one stripe per GC thread. The size of
	// a stripe is a constant, num_cards_in_stripe.
	//
	// +===============+ slice 0
	// \| stripe 0 \|
	// +---------------+
	// \| stripe 1 \|
	// +---------------+
	// \| stripe 2 \|
	// +---------------+
	// \| stripe 3 \|
	// +===============+ slice 1
	// \| stripe 0 \|
	// +---------------+
	// \| stripe 1 \|
	// +---------------+
	// \| stripe 2 \|
	// +---------------+
	// \| stripe 3 \|
	// +===============+ slice 2
	// ...
	//
	// In this case there are 4 threads, so 4 stripes. A GC thread first works on
	// its stripe within slice 0 and then moves to its stripe in the next slice
	// until it has exceeded the top of the generation. The distance to stripe in
	// the next slice is calculated based on the number of stripes. After finishing
	// stripe 0 in slice 0, the thread finds the stripe 0 in slice 1 by adding
	// slice_size_in_words to the start of stripe 0 in slice 0 to get to the start
	// of stripe 0 in slice 1.

	// Scavenging and accesses to the card table are strictly limited to the stripe.
	// In particular scavenging of an object crossing stripe boundaries is shared
	// among the threads assigned to the stripes it resides on. This reduces
	// complexity and enables shared scanning of large objects.
	// It requires preprocessing of the card table though where imprecise card marks of
	// objects crossing stripe boundaries are propagated to the first card of
	// each stripe covered by the individual object.

	void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
	HeapWord* old_gen_bottom,
	HeapWord* old_gen_top,
	PSPromotionManager* pm,
	uint stripe_index,
	uint n_stripes) {
	// ObjectStartArray queries can be expensive for large objects. We cache known objects.
	struct {
	HeapWord* start_addr;
	HeapWord* end_addr;
	} cached_obj {nullptr, old_gen_bottom};

	// Queries must be monotonic because we don't check addr >= cached_obj.start_addr.
	auto object_start = [&] (HeapWord* addr) {
	if (addr < cached_obj.end_addr) {
	assert(cached_obj.start_addr != nullptr, "inv");
	return cached_obj.start_addr;
	}
	HeapWord* result = start_array->object_start(addr);

	cached_obj.start_addr = result;
	cached_obj.end_addr = result + cast_to_oop(result)->size();

	return result;
	};

	// Prepare scavenge.
	preprocess_card_table_parallel(object_start, old_gen_bottom, old_gen_top, stripe_index, n_stripes);

	// Sync with other workers.
	Atomic::dec(&_preprocessing_active_workers);
	SpinYield spin_yield;
	while (Atomic::load_acquire(&_preprocessing_active_workers) > 0) {
	spin_yield.wait();
	}

	// Scavenge
	cached_obj = {nullptr, old_gen_bottom};
	const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words;
	const size_t slice_size_in_words = stripe_size_in_words * n_stripes;
	HeapWord* cur_addr = old_gen_bottom + stripe_index * stripe_size_in_words;
	for (/* empty */; cur_addr < old_gen_top; cur_addr += slice_size_in_words) {
	HeapWord* const stripe_l = cur_addr;
	HeapWord* const stripe_r = MIN2(cur_addr + stripe_size_in_words,
	old_gen_top);

	process_range(object_start, pm, stripe_l, stripe_r);
	}
	}

	// This should be called before a scavenge.
	void PSCardTable::verify_all_young_refs_imprecise() {
	CheckForUnmarkedObjects check;

	ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
	PSOldGen* old_gen = heap->old_gen();

	old_gen->object_iterate(&check);
	}

	// This should be called immediately after a scavenge, before mutators resume.
	void PSCardTable::verify_all_young_refs_precise() {
	ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
	PSOldGen* old_gen = heap->old_gen();

	CheckForPreciseMarks check(heap->young_gen(), this);

	old_gen->oop_iterate(&check);

	verify_all_young_refs_precise_helper(old_gen->object_space()->used_region());
	}

	void PSCardTable::verify_all_young_refs_precise_helper(MemRegion mr) {
	CardValue* bot = byte_for(mr.start());
	CardValue* top = byte_for(mr.end());
	while (bot <= top) {
	assert(bot == clean_card \|\| bot == verify_card, "Found unwanted or unknown card mark");
	if (*bot == verify_card)
	*bot = youngergen_card;
	bot++;
	}
	}

	bool PSCardTable::addr_is_marked_imprecise(void *addr) {
	CardValue* p = byte_for(addr);
	CardValue val = *p;

	if (card_is_dirty(val))
	return true;

	if (card_is_newgen(val))
	return true;

	if (card_is_clean(val))
	return false;

	assert(false, "Found unhandled card mark type");

	return false;
	}

	// Also includes verify_card
	bool PSCardTable::addr_is_marked_precise(void *addr) {
	CardValue* p = byte_for(addr);
	CardValue val = *p;

	if (card_is_newgen(val))
	return true;

	if (card_is_verify(val))
	return true;

	if (card_is_clean(val))
	return false;

	if (card_is_dirty(val))
	return false;

	assert(false, "Found unhandled card mark type");

	return false;
	}

	bool PSCardTable::is_in_young(const void* p) const {
	return ParallelScavengeHeap::heap()->is_in_young(p);
	}