blob: 02847bdfd6751831ebfd8cbbdd3091e7a81d383a [file] [log] [blame] [edit]
#!/usr/bin/env python3
# Copyright 2020 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Helper tool to generate cross-compiled syscall and constant tables to JSON.
This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
generates the `constants.json` file with that. LLVM IR files are moderately
architecture-neutral (at least for this case).
"""
import argparse
import collections
import json
import re
import sys
_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
_TABLE_ENTRY_RE = re.compile(
r"%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}"
)
# This looks something like
#
# i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
#
# For arm-v7a. What we are interested in are the @.str.x and the very last
# number.
_TABLE_ENTRY_CONTENTS = re.compile(r".*?(null|@[a-zA-Z0-9.]+).* (-?\d+)")
# pylint: disable=line-too-long
# When testing clang-r458909, we found a new constant_entry pattern:
# %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) },
# For the same constant, current clang-r458507 generates:
# %struct.constant_entry { i8* getelementptr inbounds
# ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0),
# i32 ptrtoint ([9 x i8]* @.str.895 to i32) },
# This is for a char* constant defined in linux-x86/libconstants.gen.c:
# { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX },
# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:"
# Current output for that constant in constants.json is:
# "FS_KEY_DESC_PREFIX": 0,
# but that value does not seem to be useful or accurate.
# So here we define a pattern to ignore such pointer constants:
# pylint: enable=line-too-long
_IGNORED_ENTRY_CONTENTS = re.compile(r".*? ptrto.* \(.*\)")
ParseResults = collections.namedtuple(
"ParseResults", ["table_name", "table_entries"]
)
HELP_EPILOG = """\
Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
"""
def parse_llvm_ir(ir):
"""Parses a single LLVM IR file."""
string_constants = collections.OrderedDict()
table_entries = collections.OrderedDict()
table_name = ""
for line in ir:
string_constant_match = _STRING_CONSTANT_RE.match(line)
if string_constant_match:
string_constants[
string_constant_match.group(1)
] = string_constant_match.group(2)
continue
if "@syscall_table" in line or "@constant_table" in line:
if "@syscall_table" in line:
table_name = "syscalls"
else:
table_name = "constants"
for entry in _TABLE_ENTRY_RE.findall(line):
groups = _TABLE_ENTRY_CONTENTS.match(entry)
if not groups:
if _IGNORED_ENTRY_CONTENTS.match(entry):
continue
raise ValueError("Failed to parse table entry %r" % entry)
name, value = groups.groups()
if name == "null":
# This is the end-of-table marker.
break
table_entries[string_constants[name]] = int(value)
return ParseResults(table_name=table_name, table_entries=table_entries)
def main(argv=None):
"""Main entrypoint."""
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
parser.add_argument(
"--output",
help="The path of the generated constants.json file.",
type=argparse.FileType("w"),
required=True,
)
parser.add_argument(
"llvm_ir_files",
help="An LLVM IR file with one of the {constants,syscall} table.",
metavar="llvm_ir_file",
nargs="+",
type=argparse.FileType("r"),
)
opts = parser.parse_args(argv)
constants_json = {}
for ir in opts.llvm_ir_files:
parse_results = parse_llvm_ir(ir)
constants_json[parse_results.table_name] = parse_results.table_entries
# Populate the top-level fields.
constants_json["arch_nr"] = constants_json["constants"]["MINIJAIL_ARCH_NR"]
constants_json["bits"] = constants_json["constants"]["MINIJAIL_ARCH_BITS"]
# It is a bit more complicated to generate the arch_name, since the
# constants can only output numeric values. Use a hardcoded mapping instead.
if constants_json["arch_nr"] == 0xC000003E:
constants_json["arch_name"] = "x86_64"
elif constants_json["arch_nr"] == 0x40000003:
constants_json["arch_name"] = "x86"
elif constants_json["arch_nr"] == 0xC00000B7:
constants_json["arch_name"] = "arm64"
elif constants_json["arch_nr"] == 0x40000028:
constants_json["arch_name"] = "arm"
elif constants_json["arch_nr"] == 0xC00000F3:
constants_json["arch_name"] = "riscv64"
else:
raise ValueError(
"Unknown architecture: 0x%08X" % constants_json["arch_nr"]
)
json.dump(constants_json, opts.output, indent=" ")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))