/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TNT_UTILS_PROFILER_H
#define TNT_UTILS_PROFILER_H

#include <assert.h>
#include <stdint.h>

#include <array>
#include <chrono>

#if defined(__linux__)
#   include <unistd.h>
#   include <sys/ioctl.h>
#   include <linux/perf_event.h>
#endif

namespace utils {

class Profiler {
    enum {
        INSTRUCTIONS    = 0,   // must be zero
        CPU_CYCLES      = 1,
        DCACHE_REFS     = 2,
        DCACHE_MISSES   = 3,
        BRANCHES        = 4,
        BRANCH_MISSES   = 5,
        ICACHE_REFS     = 6,
        ICACHE_MISSES   = 7,

        // Must be last one
        EVENT_COUNT
    };

public:

    enum {
        EV_CPU_CYCLES = 1 << CPU_CYCLES,
        EV_L1D_REFS   = 1 << DCACHE_REFS,
        EV_L1D_MISSES = 1 << DCACHE_MISSES,
        EV_BPU_REFS   = 1 << BRANCHES,
        EV_BPU_MISSES = 1 << BRANCH_MISSES,
        EV_L1I_REFS   = 1 << ICACHE_REFS,
        EV_L1I_MISSES = 1 << ICACHE_MISSES,
        // helpers
        EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
        EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
        EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
    };

    static Profiler& get() noexcept;


    Profiler(const Profiler& rhs) = delete;
    Profiler(Profiler&& rhs) = delete;
    Profiler& operator=(const Profiler& rhs) = delete;
    Profiler& operator=(Profiler&& rhs) = delete;

    // selects which events are enabled. 
    // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
    uint32_t resetEvents(uint32_t eventMask) noexcept;

    uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }

    // could return false if performance counters are not supported/enabled
    bool isValid() const { return mCountersFd[0] >= 0; }

    class Counters {
        friend class Profiler;
        uint64_t nr;
        uint64_t time_enabled;
        uint64_t time_running;
        struct {
            uint64_t value;
            uint64_t id;
        } counters[Profiler::EVENT_COUNT];

        friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
            lhs.nr -= rhs.nr;
            lhs.time_enabled -= rhs.time_enabled;
            lhs.time_running -= rhs.time_running;
            for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
                lhs.counters[i].value -= rhs.counters[i].value;
            }
            return lhs;
        }

    public:
        uint64_t getInstructions() const        { return counters[INSTRUCTIONS].value; }
        uint64_t getCpuCycles() const           { return counters[CPU_CYCLES].value; }
        uint64_t getL1DReferences() const       { return counters[DCACHE_REFS].value; }
        uint64_t getL1DMisses() const           { return counters[DCACHE_MISSES].value; }
        uint64_t getL1IReferences() const       { return counters[ICACHE_REFS].value; }
        uint64_t getL1IMisses() const           { return counters[ICACHE_MISSES].value; }
        uint64_t getBranchInstructions() const  { return counters[BRANCHES].value; }
        uint64_t getBranchMisses() const        { return counters[BRANCH_MISSES].value; }

        std::chrono::duration<uint64_t, std::nano> getWallTime() const {
            return std::chrono::duration<uint64_t, std::nano>(time_enabled);
        }

        std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
            return std::chrono::duration<uint64_t, std::nano>(time_running);
        }

        double getIPC() const noexcept {
            uint64_t cpuCycles = getCpuCycles();
            uint64_t instructions = getInstructions();
            return double(instructions) / double(cpuCycles);
        }

        double getCPI() const noexcept {
            uint64_t cpuCycles = getCpuCycles();
            uint64_t instructions = getInstructions();
            return double(cpuCycles) / double(instructions);
        }

        double getL1DMissRate() const noexcept {
            uint64_t cacheReferences = getL1DReferences();
            uint64_t cacheMisses = getL1DMisses();
            return double(cacheMisses) / double(cacheReferences);
        }

        double getL1DHitRate() const noexcept {
            return 1.0 - getL1DMissRate();
        }

        double getL1IMissRate() const noexcept {
            uint64_t cacheReferences = getL1IReferences();
            uint64_t cacheMisses = getL1IMisses();
            return double(cacheMisses) / double(cacheReferences);
        }

        double getL1IHitRate() const noexcept {
            return 1.0 - getL1IMissRate();
        }

        double getBranchMissRate() const noexcept {
            uint64_t branchReferences = getBranchInstructions();
            uint64_t branchMisses = getBranchMisses();
            return double(branchMisses) / double(branchReferences);
        }

        double getBranchHitRate() const noexcept {
            return 1.0 - getBranchMissRate();
        }

        double getMPKI(uint64_t misses) const noexcept {
            return (misses * 1000.0) / getInstructions();
        }

    };

#if defined(__linux__)

    void reset() noexcept {
        int fd = mCountersFd[0];
        ioctl(fd, PERF_EVENT_IOC_RESET,  PERF_IOC_FLAG_GROUP);
    }

    void start() noexcept {
        int fd = mCountersFd[0];
        ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
    }

    void stop() noexcept {
        int fd = mCountersFd[0];
        ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
    }

    void readCounters(Counters* outCounters) noexcept {
        Counters counters;
        ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
        memset(outCounters, 0, sizeof(Counters));
        if (n > 0) {
            outCounters->nr = counters.nr;
            outCounters->time_enabled = counters.time_enabled;
            outCounters->time_running = counters.time_running;
            for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
                if (mCountersFd[i] >= 0) {
                    outCounters->counters[i] = counters.counters[mIds[i]];
                }
            }
        }
    }

#else // !__linux__

    void reset() noexcept { }
    void start() noexcept { }
    void stop() noexcept { }
    void readCounters(Counters* counters) noexcept { }

#endif // __linux__

    bool hasBranchRates() const noexcept {
        return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
    }

    bool hasICacheRates() const noexcept {
        return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
    }

private:
    Profiler() noexcept;
    ~Profiler() noexcept;

    std::array<uint8_t, EVENT_COUNT> mIds;
    std::array<int, EVENT_COUNT> mCountersFd;
    uint32_t mEnabledEvents = 0;
};

} // namespace utils

#endif // TNT_UTILS_PROFILER_H
