blob: fb7f3f3468e69d7793864834f1d8161c37dcf0db [file]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "interceptor.h"
#include <dlfcn.h>
#include <unistd.h>
#include <algorithm>
#include <array>
#include <filesystem>
#include <fstream>
#include <initializer_list>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <regex>
#include <sstream>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <android-base/strings.h>
#include <google/protobuf/util/delimited_message_util.h>
namespace fs = std::filesystem;
// UTILITY function declarations
// process applicable calls (i.e. programs that we might be able to handle)
static void process_command(const char* filename, char* const argv[], char* const envp[]);
// log command if logging is enabled
static void log(const interceptor::Command&);
// execute potentially modified command
static void exec(const interceptor::Command&, char* const envp[]);
// OVERLOADS for LD_PRELOAD USE
// Intercept execve calls, for that capture the original execve call
static auto const old_execve = reinterpret_cast<decltype(execve)*>(dlsym(RTLD_NEXT, "execve"));
extern "C" {
int execve(const char* filename, char* const argv[], char* const envp[]) {
// pass on to process_command(), if unhandled, fall back to the original
// execve
process_command(filename, argv, envp);
return old_execve(filename, argv, envp);
}
} // extern "C"
// LIBRARY IMPLEMENTATION
namespace interceptor {
static Command instantiate_command(const char* program, char* const argv[], char* const envp[]) {
Command result;
result.set_program(program);
result.set_current_dir(fs::current_path());
for (auto current_arg = argv; *current_arg; ++current_arg) {
result.add_args(*current_arg);
}
for (auto current_env = envp; *current_env; ++current_env) {
const std::string s(*current_env);
const auto pos = s.find('=');
if (pos == std::string::npos) {
continue;
}
(*result.mutable_env_vars())[s.substr(0, pos)] = s.substr(pos + 1);
}
return result;
}
static void make_relative(Command* command) {
// determine the ROOT_DIR
std::string root_dir;
if (auto it = command->env_vars().find(ENV_root_dir); it != command->env_vars().cend()) {
root_dir = it->second;
if (root_dir[root_dir.size() - 1] != '/') root_dir += '/';
} else {
return;
}
// determine the relative path to ROOT_DIR from the current working dir
std::string rel_root = fs::relative(root_dir);
if (rel_root[rel_root.size() - 1] != '/') rel_root += '/';
if (rel_root == "./") rel_root.clear();
// TODO: This is generally bad as this means we can't make anything relative.
// This happens if the out dir is outside of the root.
if (rel_root.find(root_dir) != std::string::npos) {
return;
}
command->set_current_dir(fs::relative(command->current_dir(), root_dir));
// replacement functor
const auto replace_all = [&](auto& str) {
auto pos = std::string::npos;
while ((pos = str.find(root_dir)) != std::string::npos) {
str.replace(pos, root_dir.length(), rel_root);
}
};
// now go and replace everything
replace_all(*command->mutable_program());
std::for_each(command->mutable_args()->begin(), command->mutable_args()->end(), replace_all);
}
template <typename V>
static void dump_vector(std::ostream& os, const V& vec) {
bool comma = false;
for (const auto& e : vec) {
if (comma) {
os << ", ";
}
os << std::quoted(e);
comma = true;
}
}
std::ostream& operator<<(std::ostream& os, const interceptor::Command& command) {
os << "[(";
dump_vector(os, command.inputs());
os << ") => (";
dump_vector(os, command.outputs());
os << ")] ";
// TODO: chain output iterators instead and find a common expression
const static auto escape = [](auto in) {
in = android::base::StringReplace(in, "\t", "\\t", true);
in = android::base::StringReplace(in, "\n", "\\n", true);
return in;
};
std::ostringstream cmd;
cmd << command.program();
for (auto I = std::next(command.args().cbegin()), E = command.args().cend(); I != E; ++I)
cmd << ' ' << escape(*I);
os << cmd.str();
return os;
}
static AnalysisResult analyze_command(const interceptor::Command& command);
static void analyze(Command* command) {
auto [inputs, outputs] = analyze_command(*command);
// TODO: this sanitizing should be done during make_relative
for (auto& input : inputs) {
if (input.rfind("./", 0) == 0) {
input = input.substr(2);
}
}
for (auto& output : outputs) {
if (output.rfind("./", 0) == 0) {
output = output.substr(2);
}
}
for (const auto& input : inputs) {
if (!fs::is_regular_file(input)) {
std::cerr << "missing input: " << input << "\n" << *command << "\n";
exit(1);
}
}
*command->mutable_inputs() = {inputs.cbegin(), inputs.cend()};
*command->mutable_outputs() = {outputs.cbegin(), outputs.cend()};
}
/// COMMAND ANALYSIS
using Analyzer = std::function<AnalysisResult(const std::string&, const ArgVec&, const EnvMap&)>;
static AnalysisResult analyze_compiler_linker(const std::string&, const ArgVec& args,
const EnvMap&) {
static constexpr std::array kSkipNextArgs{
"-isystem", "-I", "-L", "-m", "-soname", "-z",
};
static constexpr std::string_view kOutputOption = "-Wp,-MMD,";
AnalysisResult result;
bool next_is_out = false;
bool skip_next = false;
// skip args[0] as this is the program itself
for (auto it = args.cbegin() + 1; it != args.cend(); ++it) {
const auto& arg = *it;
if (arg == "-o") {
next_is_out = true;
continue;
}
if (next_is_out) {
result.outputs.push_back(arg);
next_is_out = false;
continue;
}
if (arg.rfind(kOutputOption, 0) == 0) {
result.outputs.push_back(arg.substr(kOutputOption.size()));
}
if (skip_next) {
skip_next = false;
continue;
}
if (std::find(kSkipNextArgs.cbegin(), kSkipNextArgs.cend(), arg) != kSkipNextArgs.cend()) {
skip_next = true;
}
// ignore test compilations
if (arg == "/dev/null" || arg == "-") {
return {};
}
if (arg[0] == '-') { // ignore flags
continue;
}
result.inputs.push_back(arg);
}
return result;
}
static AnalysisResult analyze_archiver(const std::string&, const ArgVec& args, const EnvMap&) {
AnalysisResult result;
if (args.size() < 3) return result;
// skip args[0] as this is the program itself
// skip args[1] are the archiver flags
// args[2] is the output
result.outputs.push_back(args[2]);
// args[3:] are the inputs
result.inputs.insert(result.inputs.cend(), args.cbegin() + 3, args.cend());
return result;
}
static const std::initializer_list<std::pair<std::regex, Analyzer>> analyzers{
{
std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"),
analyze_compiler_linker,
},
{
std::regex("^(.*/)?(llvm-)?ar$"),
analyze_archiver,
},
};
static AnalysisResult analyze_command(const Command& command) {
for (const auto& [regex, analyzer] : analyzers) {
if (std::regex_match(command.args()[0], regex)) {
return analyzer(command.program(), command.args(), command.env_vars());
}
}
return {};
}
} // namespace interceptor
/// UTILITY FUNCTIONS
static void process_command(const char* filename, char* const argv[], char* const envp[]) {
// First, try to find out whether we at all can handle this command. If not,
// simply return and fall back to the original handler.
if (!fs::is_regular_file(filename)) {
return;
}
// Ok, we can handle that one, let's transform it.
auto command = interceptor::instantiate_command(filename, argv, envp);
// rewrite all command line arguments (including the program itself) to use
// paths relative to ROOT_DIR. This is essential for reproducible builds and
// furthermore necessary to produce cache hits in RBE.
make_relative(&command);
analyze(&command);
log(command);
// pass down the transformed command to execve
exec(command, envp);
}
static void log(const interceptor::Command& command) {
const auto& env = command.env_vars();
if (const auto env_it = env.find(ENV_command_log); env_it != env.cend()) {
std::ofstream file;
file.open(std::string(env_it->second),
std::ofstream::out | std::ofstream::app | std::ofstream::binary);
interceptor::Message message;
*message.mutable_command() = command;
message.mutable_command()->clear_env_vars();
if (file.is_open()) {
google::protobuf::util::SerializeDelimitedToOstream(message, &file);
}
}
}
static void exec(const interceptor::Command& command, char* const envp[]) {
std::vector<const char*> c_args;
c_args.reserve(command.args().size() + 1);
c_args[command.args().size()] = nullptr;
for (const auto& arg : command.args()) {
c_args.push_back(arg.data());
}
// TODO: at this point, we could free some memory that is held in Command.
// While the args vector is reused for args, we could free the EnvMap
// and the original args.
// does not return
old_execve(command.program().c_str(), const_cast<char**>(c_args.data()), envp);
}