| #!/usr/bin/env python3 |
| # Copyright 2020 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Helper tool to generate cross-compiled syscall and constant tables to JSON. |
| |
| This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and |
| generates the `constants.json` file with that. LLVM IR files are moderately |
| architecture-neutral (at least for this case). |
| """ |
| |
| import argparse |
| import collections |
| import json |
| import re |
| import sys |
| |
| |
| _STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*') |
| _TABLE_ENTRY_RE = re.compile( |
| r"%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}" |
| ) |
| # This looks something like |
| # |
| # i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5 |
| # |
| # For arm-v7a. What we are interested in are the @.str.x and the very last |
| # number. |
| _TABLE_ENTRY_CONTENTS = re.compile(r".*?(null|@[a-zA-Z0-9.]+).* (-?\d+)") |
| |
| # pylint: disable=line-too-long |
| # When testing clang-r458909, we found a new constant_entry pattern: |
| # %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) }, |
| # For the same constant, current clang-r458507 generates: |
| # %struct.constant_entry { i8* getelementptr inbounds |
| # ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0), |
| # i32 ptrtoint ([9 x i8]* @.str.895 to i32) }, |
| # This is for a char* constant defined in linux-x86/libconstants.gen.c: |
| # { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX }, |
| # and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:" |
| # Current output for that constant in constants.json is: |
| # "FS_KEY_DESC_PREFIX": 0, |
| # but that value does not seem to be useful or accurate. |
| # So here we define a pattern to ignore such pointer constants: |
| # pylint: enable=line-too-long |
| _IGNORED_ENTRY_CONTENTS = re.compile(r".*? ptrto.* \(.*\)") |
| |
| ParseResults = collections.namedtuple( |
| "ParseResults", ["table_name", "table_entries"] |
| ) |
| |
| HELP_EPILOG = """\ |
| Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c |
| """ |
| |
| |
| def parse_llvm_ir(ir): |
| """Parses a single LLVM IR file.""" |
| string_constants = collections.OrderedDict() |
| table_entries = collections.OrderedDict() |
| table_name = "" |
| for line in ir: |
| string_constant_match = _STRING_CONSTANT_RE.match(line) |
| if string_constant_match: |
| string_constants[ |
| string_constant_match.group(1) |
| ] = string_constant_match.group(2) |
| continue |
| |
| if "@syscall_table" in line or "@constant_table" in line: |
| if "@syscall_table" in line: |
| table_name = "syscalls" |
| else: |
| table_name = "constants" |
| for entry in _TABLE_ENTRY_RE.findall(line): |
| groups = _TABLE_ENTRY_CONTENTS.match(entry) |
| if not groups: |
| if _IGNORED_ENTRY_CONTENTS.match(entry): |
| continue |
| raise ValueError("Failed to parse table entry %r" % entry) |
| name, value = groups.groups() |
| if name == "null": |
| # This is the end-of-table marker. |
| break |
| table_entries[string_constants[name]] = int(value) |
| |
| return ParseResults(table_name=table_name, table_entries=table_entries) |
| |
| |
| def main(argv=None): |
| """Main entrypoint.""" |
| |
| if argv is None: |
| argv = sys.argv[1:] |
| |
| parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG) |
| parser.add_argument( |
| "--output", |
| help="The path of the generated constants.json file.", |
| type=argparse.FileType("w"), |
| required=True, |
| ) |
| parser.add_argument( |
| "llvm_ir_files", |
| help="An LLVM IR file with one of the {constants,syscall} table.", |
| metavar="llvm_ir_file", |
| nargs="+", |
| type=argparse.FileType("r"), |
| ) |
| opts = parser.parse_args(argv) |
| |
| constants_json = {} |
| for ir in opts.llvm_ir_files: |
| parse_results = parse_llvm_ir(ir) |
| constants_json[parse_results.table_name] = parse_results.table_entries |
| |
| # Populate the top-level fields. |
| constants_json["arch_nr"] = constants_json["constants"]["MINIJAIL_ARCH_NR"] |
| constants_json["bits"] = constants_json["constants"]["MINIJAIL_ARCH_BITS"] |
| |
| # It is a bit more complicated to generate the arch_name, since the |
| # constants can only output numeric values. Use a hardcoded mapping instead. |
| if constants_json["arch_nr"] == 0xC000003E: |
| constants_json["arch_name"] = "x86_64" |
| elif constants_json["arch_nr"] == 0x40000003: |
| constants_json["arch_name"] = "x86" |
| elif constants_json["arch_nr"] == 0xC00000B7: |
| constants_json["arch_name"] = "arm64" |
| elif constants_json["arch_nr"] == 0x40000028: |
| constants_json["arch_name"] = "arm" |
| elif constants_json["arch_nr"] == 0xC00000F3: |
| constants_json["arch_name"] = "riscv64" |
| else: |
| raise ValueError( |
| "Unknown architecture: 0x%08X" % constants_json["arch_nr"] |
| ) |
| |
| json.dump(constants_json, opts.output, indent=" ") |
| return 0 |
| |
| |
| if __name__ == "__main__": |
| sys.exit(main(sys.argv[1:])) |