// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2022 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License.  You may obtain a copy of the License at
//
//     https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Aleksei Vetrov

#include "dwarf_wrappers.h"

#include <dwarf.h>
#include <elfutils/libdw.h>

#include <cstddef>
#include <cstdint>
#include <optional>
#include <ostream>
#include <string>
#include <utility>
#include <vector>

#include "error.h"
#include "hex.h"

namespace stg {
namespace dwarf {

std::ostream& operator<<(std::ostream& os, const Location& location) {
  switch (location.kind) {
    case Location::Kind::ADDRESS:
      return os << Hex(location.value);
    case Location::Kind::TLS:
      return os << "TLS:" << Hex(location.value);
  }
}

namespace {

constexpr int kReturnOk = 0;
constexpr int kReturnNoEntry = 1;

std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
                                            uint32_t attribute) {
  // Create an optional with default-initialized value already inside
  std::optional<Dwarf_Attribute> result(std::in_place);
  // "integrate" automatically resolves DW_AT_abstract_origin and
  // DW_AT_specification references, fetching the attribute from the linked DIE.
  //
  // libdw has infinite loop protection, as it stops after 16 dereferences.
  // TODO: don't use dwarf_attr_integrate by default
  if (!dwarf_attr_integrate(die, attribute, &result.value())) {
    result.reset();
  }
  return result;
}

// Get the attribute directly from DIE without following DW_AT_specification and
// DW_AT_abstract_origin references.
std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
                                                  uint32_t attribute) {
  // Create an optional with default-initialized value already inside
  std::optional<Dwarf_Attribute> result(std::in_place);
  if (!dwarf_attr(die, attribute, &result.value())) {
    result.reset();
  }
  return result;
}

std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
  switch (operand.atom) {
    case DW_OP_addr:
    case DW_OP_const1u:
    case DW_OP_const2u:
    case DW_OP_const4u:
    case DW_OP_const8u:
    case DW_OP_constu:
      return operand.number;
    case DW_OP_const1s:
    case DW_OP_const2s:
    case DW_OP_const4s:
    case DW_OP_const8s:
    case DW_OP_consts:
      if (static_cast<int64_t>(operand.number) < 0) {
        // Atom is not an unsigned constant
        return std::nullopt;
      }
      return operand.number;
    case DW_OP_lit0...DW_OP_lit31:
      return operand.atom - DW_OP_lit0;
    default:
      return std::nullopt;
  }
}

struct Expression {
  const Dwarf_Op& operator[](size_t i) const {
    return atoms[i];
  }

  Dwarf_Op* atoms = nullptr;
  size_t length = 0;
};

std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) {
  Expression result;

  Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
        kReturnOk) << "dwarf_getlocation returned error";
  // If no location attribute is present or has an empty location description,
  // the variable is present in the source but not in the object code.
  // So zero length expression is equivalent of no location attribute.
  if (result.length == 0) {
    return std::nullopt;
  }
  Check(result.atoms != nullptr)
      << "dwarf_getlocation returned non-empty expression with NULL atoms";
  return result;
}

}  // namespace

std::vector<CompilationUnit> GetCompilationUnits(Dwarf& dwarf) {
  std::vector<CompilationUnit> result;
  Dwarf_Off offset = 0;
  while (true) {
    Dwarf_Off next_offset;
    size_t header_size = 0;
    Dwarf_Half version = 0;
    const int return_code =
        dwarf_next_unit(&dwarf, offset, &next_offset, &header_size, &version,
                        nullptr, nullptr, nullptr, nullptr, nullptr);
    Check(return_code == kReturnOk || return_code == kReturnNoEntry)
        << "dwarf_next_unit returned error";
    if (return_code == kReturnNoEntry) {
      break;
    }
    result.push_back({version, {}});
    Check(dwarf_offdie(&dwarf, offset + header_size,
                       &result.back().entry.die) != nullptr)
        << "dwarf_offdie returned error";

    offset = next_offset;
  }
  return result;
}

std::vector<Entry> Entry::GetChildren() {
  Entry child;
  int return_code = dwarf_child(&die, &child.die);
  Check(return_code == kReturnOk || return_code == kReturnNoEntry)
      << "dwarf_child returned error";
  std::vector<Entry> result;
  while (return_code == kReturnOk) {
    result.push_back(child);
    return_code = dwarf_siblingof(&child.die, &child.die);
    Check(return_code == kReturnOk || return_code == kReturnNoEntry)
        << "dwarf_siblingof returned error";
  }
  return result;
}

int Entry::GetTag() {
  return dwarf_tag(&die);
}

Dwarf_Off Entry::GetOffset() {
  return dwarf_dieoffset(&die);
}

std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
  std::optional<std::string> result;
  auto dwarf_attribute = GetAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return result;
  }

  const char* value = dwarf_formstring(&dwarf_attribute.value());
  Check(value != nullptr) << "dwarf_formstring returned error";
  result.emplace(value);
  return result;
}

std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
  std::optional<std::string> result;
  auto dwarf_attribute = GetDirectAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return result;
  }

  const char* value = dwarf_formstring(&dwarf_attribute.value());
  Check(value != nullptr) << "dwarf_formstring returned error";
  result.emplace(value);
  return result;
}

std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
  auto dwarf_attribute = GetAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return {};
  }

  uint64_t value;
  if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
    Die() << "dwarf_formudata returned error";
  }
  return value;
}

uint64_t Entry::MustGetUnsignedConstant(uint32_t attribute) {
  auto maybe_constant = MaybeGetUnsignedConstant(attribute);
  if (!maybe_constant) {
    Die() << "DWARF entry <" << Hex(GetOffset()) << "> with tag " << GetTag()
          << " is missing attribute " << Hex(attribute);
  }
  return maybe_constant.value();
}

bool Entry::GetFlag(uint32_t attribute) {
  bool result = false;
  auto dwarf_attribute = (attribute == DW_AT_declaration)
                             ? GetDirectAttribute(&die, attribute)
                             : GetAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return result;
  }

  Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk)
      << "dwarf_formflag returned error";
  return result;
}

std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
  std::optional<Entry> result;
  auto dwarf_attribute = GetAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return result;
  }

  result.emplace();
  Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die))
      << "dwarf_formref_die returned error";
  return result;
}

namespace {

std::optional<Location> GetLocationFromExpression(Dwarf_Attribute& attribute) {
  const auto expression_opt = MaybeGetExpression(attribute);
  if (!expression_opt) {
    return {};
  }
  const Expression& expression = *expression_opt;

  Dwarf_Attribute result_attribute;
  if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
      kReturnOk) {
    uint64_t address;
    Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
        << "dwarf_formaddr returned error";
    return Location{Location::Kind::ADDRESS, address};
  }

  if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
    // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
    // manually extract the address from expression.
    return Location{Location::Kind::ADDRESS, expression[0].number};
  }
  if (expression.length == 2 && expression[0].atom == DW_OP_addr &&
      expression[1].atom == DW_OP_plus_uconst) {
    // A rather odd case seen from Clang.
    return Location{Location::Kind::ADDRESS,
                    expression[0].number + expression[1].number};
  }

  // TLS operation has different encodings in Clang and GCC:
  // * Clang 14 uses DW_OP_GNU_push_tls_address
  // * GCC 12 uses DW_OP_form_tls_address
  if (expression.length == 2 &&
      (expression[1].atom == DW_OP_GNU_push_tls_address ||
       expression[1].atom == DW_OP_form_tls_address)) {
    // TLS symbols address may be incorrect because of unsupported
    // relocations. Resetting it to zero the same way as it is done in
    // elf::Reader::GetUserspaceSymbols.
    // TODO: match TLS variables by address
    return Location{Location::Kind::TLS, 0};
  }

  Die() << "Unsupported data location expression";
}

}  // namespace

std::optional<Location> Entry::MaybeGetLocation(uint32_t attribute) {
  auto dwarf_attribute = GetAttribute(&die, attribute);
  if (!dwarf_attribute) {
    return {};
  }
  if (attribute == DW_AT_location) {
    return GetLocationFromExpression(*dwarf_attribute);
  }

  uint64_t address;
  Check(dwarf_formaddr(&dwarf_attribute.value(), &address) == kReturnOk)
      << "dwarf_formaddr returned error";
  return Location{Location::Kind::ADDRESS, address};
}

std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
  auto attribute = GetAttribute(&die, DW_AT_data_member_location);
  if (!attribute) {
    return {};
  }

  uint64_t offset;
  // Try to interpret attribute as an unsigned integer constant
  if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
    return offset;
  }

  // Parse location expression
  const auto expression_opt = MaybeGetExpression(attribute.value());
  if (!expression_opt) {
    return {};
  }
  const Expression& expression = *expression_opt;

  // Parse virtual base classes offset, which looks like this:
  //   [0] = DW_OP_dup
  //   [1] = DW_OP_deref
  //   [2] = constant operand
  //   [3] = DW_OP_minus
  //   [4] = DW_OP_deref
  //   [5] = DW_OP_plus
  // This form is not in the standard, but hardcoded in compilers:
  //   * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
  //   * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
  if (expression.length == 6 &&
      expression[0].atom == DW_OP_dup &&
      expression[1].atom == DW_OP_deref &&
      expression[3].atom == DW_OP_minus &&
      expression[4].atom == DW_OP_deref &&
      expression[5].atom == DW_OP_plus) {
    const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
    if (byte_offset) {
      return byte_offset;
    }
  }

  Die() << "Unsupported member offset expression, " << Hex(GetOffset());
}

std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
  auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
  if (!attribute) {
    return {};
  }

  // Parse location expression
  const auto expression_opt = MaybeGetExpression(attribute.value());
  if (!expression_opt) {
    return {};
  }
  const Expression& expression = *expression_opt;

  // We expect compilers to produce expression with one constant operand
  if (expression.length == 1) {
    const auto offset = MaybeGetUnsignedOperand(expression[0]);
    if (offset) {
      return offset;
    }
  }

  Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
}

std::optional<uint64_t> Entry::MaybeGetCount() {
  auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound);
  if (lower_bound_attribute && *lower_bound_attribute != 0) {
    Die() << "Non-zero DW_AT_lower_bound is not supported";
  }
  auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound);
  auto count_attribute = GetAttribute(&die, DW_AT_count);
  if (!upper_bound_attribute && !count_attribute) {
    return {};
  }
  if (upper_bound_attribute && count_attribute) {
    Die() << "Both DW_AT_upper_bound and DW_AT_count given";
  }
  Dwarf_Attribute dwarf_attribute;
  uint64_t addend;
  if (upper_bound_attribute) {
    dwarf_attribute = *upper_bound_attribute;
    addend = 1;
  } else {
    dwarf_attribute = *count_attribute;
    addend = 0;
  }

  uint64_t value;
  if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) {
    return value + addend;
  }

  // Don't fail if attribute is not a constant and treat this as no count
  // provided. This can happen if array has variable length.
  // TODO: implement clean solution for separating "not a
  // constant" errors from other errors.
  return {};
}

Files::Files(Entry& compilation_unit) {
  if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
      kReturnOk) {
    Die() << "No source file information in DWARF";
  }
}

std::optional<std::string> Files::MaybeGetFile(Entry& entry,
                                               uint32_t attribute) const {
  auto file_index = entry.MaybeGetUnsignedConstant(attribute);
  if (!file_index) {
    return std::nullopt;
  }
  Check(files_ != nullptr) << "dwarf::Files was not initialised";
  if (*file_index >= files_count_) {
    Die() << "File index is greater than or equal files count (" << *file_index
          << " >= " << files_count_ << ")";
  }
  const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
  Check(result != nullptr) << "dwarf_filesrc returned error";
  return result;
}

}  // namespace dwarf
}  // namespace stg
