| #pragma once |
| |
| #include <c10/macros/Export.h> |
| #include <c10/util/flat_hash_map.h> |
| #include <cstddef> |
| #include <cstdint> |
| #include <memory> |
| #include <vector> |
| |
| namespace c10 { |
| |
| /* |
| * Given a sequence of allocations in a thread, AllocationPlan records |
| * 1. size of each allocation |
| * 2. Lifetime of each allocation. |
| * 3. allocation offsets: Memory offset for each allocation in a single blob of |
| * memory |
| * 4. Total size of a blob of memory required to satisfy all the allocations. |
| */ |
| class C10_API AllocationPlan { |
| private: |
| // Records size of each allocation by their sequential allocation ids. |
| std::vector<uint64_t> allocation_sizes; |
| // This maps one allocation id (X) to another allocation id (Y). |
| // Allocation X is alive until allocation Y. From allocation Y onwards |
| // allocation X is not referenced. |
| // Thus Y is the id of the first allocation after X is freed. |
| // NB: When an allocation is recorded, along with recording its size, |
| // we also set the lifetime to be numeric_limits::max() |
| // This is to track allocations that are made during the scope of |
| // profiling but were not freed until after the scope ended. |
| // Such allocations are not managed by profiling allocator. |
| std::vector<uint64_t> allocation_lifetimes; |
| // Maps an allocation to some offset in a blob of memory. |
| std::vector<uint64_t> allocation_offsets; |
| uint64_t total_size{0}; |
| void clear(); |
| friend class AllocationPlanner; |
| friend class CPUProfilingAllocator; |
| }; |
| |
| /* |
| * Map of memory ptr to allocation id. This is auxiliary information only |
| * used to establish lifetime of allocations. |
| */ |
| class C10_API AllocationPlanner { |
| private: |
| AllocationPlan* allocation_plan_{nullptr}; |
| // Maps allocated ptr to its allocation id. |
| // This is used when freeing the memory to look up the allocation id |
| // in order to establish the lifetime of a particular allocation. |
| ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_; |
| uint64_t allocation_id_{0}; |
| bool validation_mode_{false}; |
| |
| bool validate_allocation(const uint64_t size, const void* ptr); |
| bool validate_free(const void* ptr); |
| |
| public: |
| bool validation_success{true}; |
| |
| AllocationPlanner() = delete; |
| AllocationPlanner(AllocationPlan* plan, bool validate = false) |
| : allocation_plan_(plan), validation_mode_(validate) {} |
| void record_allocation(const uint64_t size, const void* ptr); |
| void record_free(const void* ptr); |
| void formulate_plan(); |
| void clear(); |
| }; |
| |
| // NOT THREAD SAFE profiling allocator. |
| class C10_API CPUProfilingAllocator { |
| private: |
| const AllocationPlan* plan_{nullptr}; |
| uint64_t allocation_id_{0}; |
| uint64_t current_size_{0}; |
| void* blob_{nullptr}; |
| ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_; |
| |
| public: |
| ~CPUProfilingAllocator(); |
| void set_plan(const AllocationPlan* plan); |
| void unset_plan(); |
| void* allocate(const size_t bytes); |
| void free(void* const ptr); |
| }; |
| |
| /* |
| * Usage: Profile allocations made by one run of the model. |
| * AllocationPlan plan; |
| * { |
| * WithProfileAllocationGuard profile_guard(&plan); |
| * module.forward(...); |
| * } |
| * plan now contains allocation plan. |
| */ |
| class C10_API WithProfileAllocationsGuard { |
| public: |
| WithProfileAllocationsGuard(AllocationPlan* plan); |
| ~WithProfileAllocationsGuard(); |
| |
| private: |
| std::unique_ptr<AllocationPlanner> planner_; |
| }; |
| |
| /* |
| * Usage: Validate allocation plan made with WithProfileAllocationGuard |
| * bool plan_validation_success, success = true; |
| * for (some number of representative inputs) |
| * { |
| * WithValidateAllocationPlanGuard(&plan, &plan_validation_success); |
| * module.forward(...); |
| * success = success && plan_validation_success; |
| * } |
| * success == true means allocations are according to plan |
| * else for some inputs allocation pattern changed. |
| */ |
| class C10_API WithValidateAllocationPlanGuard { |
| public: |
| WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success); |
| ~WithValidateAllocationPlanGuard(); |
| |
| private: |
| std::unique_ptr<AllocationPlanner> planner_; |
| bool* success_; |
| }; |
| |
| AllocationPlanner* GetThreadLocalAllocationPlanner(); |
| |
| /* |
| * Usage: Allocate tensors accordingly to allocation plan |
| * First make allocation plan. |
| * See WithProfileAllocationsGuard usage. |
| * Second validate allocation plan. |
| * See WithValidateAllocationPlanGuard usage. |
| * CPUProfilingAllocator profiling_allocator; |
| * { |
| * WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan); |
| * module.forward(...); |
| * } |
| */ |
| class C10_API WithProfilingAllocatorGuard { |
| public: |
| WithProfilingAllocatorGuard( |
| CPUProfilingAllocator* allocator, |
| const AllocationPlan* plan); |
| ~WithProfilingAllocatorGuard(); |
| }; |
| |
| CPUProfilingAllocator* GetThreadLocalProfilingAllocator(); |
| |
| } // namespace c10 |