blob: ca2c300c69356198a2e3ecb6390d9cf8f16c55a9 [file] [log] [blame]
#!/usr/bin/python
#
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate LIR files out of the definition file.
* Operand usage
Register allocator needs operand usage to learn which operands can share the
same register.
To understand register sharing options, register allocator assumes insn works
in these steps:
- read input operands
- do the job
- write output operands
So, input-output operands should have dedicated registers, while input-only
operands can share registers with output-only operands.
There might be an exception when output-only operand is written before all
input-only operands are read, so its register can't be shared. Such operands
are usually referred as output-only-early-clobber operands.
For register sharing, output-only-early-clobber operand is the same as
input-output operand, but it is unnatural to describe output-only as
input-output, so we use a special keyword for it.
Finally, keywords are:
use - input-only
def - output-only
def_early_clobber - output-only-early-clobber
use_def - input-output
* Scratch operands
Scratch operands are actually output operands - indeed, their original value
is not used and they get some new value after the insn is done. However, they
are usually written before all input operands are read, so it makes sense to
describe scratch operands as output-only-early-clobber.
"""
import asm_defs
import json
import sys
def _is_reg(arg_type):
return (asm_defs.is_greg(arg_type) or
asm_defs.is_xreg(arg_type) or
asm_defs.is_implicit_reg(arg_type))
class Operand(object):
pass
def _get_reg_operand_info(usage, kind):
if usage == 'use':
return '{ &k%s, MachineRegKind::kUse }' % (kind)
if usage == 'def':
return '{ &k%s, MachineRegKind::kDef }' % (kind)
if usage == 'use_def':
return '{ &k%s, MachineRegKind::kUseDef }' % (kind)
if usage == 'def_early_clobber':
return '{ &k%s, MachineRegKind::kDefEarlyClobber }' % (kind)
assert False, 'unknown operand usage %s' % (usage)
def _make_reg_operand(r, usage, kind):
op = Operand()
op.type = 'MachineReg'
op.name = 'r%d' % (r)
op.reg_operand_info = _get_reg_operand_info(usage, kind)
op.initializer = 'SetRegAt(%d, r%d)' % (r, r)
if asm_defs.is_greg(kind):
op.asm_arg = 'GetGReg(RegAt(%d))' % (r)
elif asm_defs.is_xreg(kind):
op.asm_arg = 'GetXReg(RegAt(%d))' % (r)
elif asm_defs.is_implicit_reg(kind):
op.asm_arg = None
else:
assert False, 'unknown register kind %s' % (kind)
return op
def _make_imm_operand(bits):
op = Operand()
op.type = 'int%s_t' % (bits)
op.name = 'imm'
op.reg_operand_info = None
op.initializer = 'set_imm(imm)'
op.asm_arg = 'static_cast<%s>(imm())' % (op.type)
return op
def _make_scale_operand():
op = Operand()
op.type = 'MachineMemOperandScale'
op.name = 'scale'
op.reg_operand_info = None
op.initializer = 'set_scale(scale)'
op.asm_arg = 'ToScaleFactor(scale())'
return op
def _make_disp_operand():
op = Operand()
op.type = 'uint32_t'
op.name = 'disp'
op.reg_operand_info = None
op.initializer = 'set_disp(disp)'
op.asm_arg = 'disp()'
return op
def _make_cond_operand():
op = Operand()
op.type = 'Assembler::Condition'
op.name = 'cond'
op.reg_operand_info = None
op.initializer = 'set_cond(cond)'
op.asm_arg = 'cond()'
return op
def _make_label_operand():
op = Operand()
# We never have both immediate and Label in same insn.
op.type = 'Label*'
op.name = 'label'
op.reg_operand_info = None
op.initializer = 'set_imm(reinterpret_cast<uintptr_t>(label))'
op.asm_arg = '*reinterpret_cast<Label*>(imm())'
return op
def _check_insn_defs(insn, skip_unsupported=False):
seen_imm = False
seen_memop = False
seen_disp = False
for arg in insn.get('args'):
kind = arg.get('class')
if _is_reg(kind):
pass
elif asm_defs.is_imm(kind):
# We share field for immediate and label in 'insn'.
assert not seen_imm
seen_imm = True
elif asm_defs.is_mem_op(kind):
# No insn can have more than one memop.
assert not seen_memop
addr_mode = insn.get('addr_mode')
if skip_unsupported:
if addr_mode not in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'):
return False
assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \
'unknown addressing mode %s' % (addr_mode)
seen_memop = True
elif asm_defs.is_disp(kind):
assert not seen_disp
seen_disp = True
elif asm_defs.is_cond(kind):
pass
elif asm_defs.is_label(kind):
assert not seen_imm
seen_imm = True
else:
assert False, 'unknown operand class %s' % (kind)
return True
def _get_insn_operands(insn):
"""For each operand, define:
- type
- name
- reg_operand_info
- initializer
- asm_arg
"""
res = []
r = 0
# Int3, Lfence, Mfence, Sfence, and UD2 have side effects not related to arguments.
side_effects = insn['name'] in ('Int3', 'Lfence', 'Mfence', 'Sfence', 'UD2')
for arg in insn.get('args'):
kind = arg.get('class')
if _is_reg(kind):
res.append(_make_reg_operand(r, arg.get('usage'), kind))
r += 1
elif asm_defs.is_imm(kind):
# We share field for immediate and label in 'insn'.
bits = kind[3:]
res.append(_make_imm_operand(bits))
elif asm_defs.is_mem_op(kind):
# If operand is memory and it's not "use" then we have side_effects
if arg['usage'] != 'use':
side_effects = True
# No insn can have more than one memop.
addr_mode = insn.get('addr_mode')
assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \
'unknown addressing mode %s' % (addr_mode)
if addr_mode in ('BaseDisp', 'BaseIndexDisp'):
res.append(_make_reg_operand(r, 'use', 'GeneralReg32'))
r += 1
if addr_mode in ('IndexDisp', 'BaseIndexDisp'):
res.append(_make_reg_operand(r, 'use', 'GeneralReg32'))
r += 1
res.append(_make_scale_operand())
res.append(_make_disp_operand())
elif asm_defs.is_disp(kind):
res.append(_make_disp_operand())
elif asm_defs.is_cond(kind):
res.append(_make_cond_operand())
elif asm_defs.is_label(kind):
res.append(_make_label_operand())
else:
assert False, 'unknown operand class %s' % (kind)
return res, side_effects
def _get_insn_debug_operands(insn):
res = []
r = 0
for arg in insn.get('args'):
kind = arg.get('class')
if _is_reg(kind):
if asm_defs.is_greg(kind) or asm_defs.is_xreg(kind):
res.append('GetRegOperandDebugString(this, %d)' % (r))
elif asm_defs.is_implicit_reg(kind):
res.append('GetImplicitRegOperandDebugString(this, %d)' % (r))
else:
assert False, 'unknown register kind %s' % (kind)
r += 1
elif asm_defs.is_imm(kind):
# We share field for immediate and label in 'insn'.
res.append('GetImmOperandDebugString(this)')
elif asm_defs.is_mem_op(kind):
# No insn can have more than one memop.
addr_mode = insn.get('addr_mode')
if addr_mode == 'Absolute':
res.append('GetAbsoluteMemOperandDebugString(this)')
elif addr_mode in ('BaseDisp', 'IndexDisp', 'BaseIndexDisp'):
res.append('Get%sMemOperandDebugString(this, %d)' % (addr_mode, r))
r += {'BaseDisp': 1, 'IndexDisp': 1, 'BaseIndexDisp': 2}[addr_mode]
else:
assert False, 'unknown addr_mode %s' % (addr_mode)
elif asm_defs.is_disp(kind):
# Hack: replace previous reg helper with mem helper.
assert res
assert res[-1].startswith('GetRegOperandDebugString')
res[-1] = 'GetBaseDispMemOperandDebugString' + res[-1][24:]
elif asm_defs.is_cond(kind):
res.append('GetCondOperandDebugString(this)')
elif asm_defs.is_label(kind):
res.append('GetLabelOperandDebugString(this)')
else:
assert False, 'unknown operand class %s' % (kind)
return res
INDENT = ' '
def _gen_insn_ctor(f, insn):
name = insn.get('name')
operands, _ = _get_insn_operands(insn)
params = ['%s %s' % (op.type, op.name) for op in operands]
inits = ['%s%s;' % (INDENT, op.initializer) for op in operands]
print('constexpr MachineInsnInfo %s::kInfo;' % (name), file=f)
print('%s::%s(%s) : MachineInsnForArch(&kInfo) {' % (name, name, ', '.join(params)), file=f)
print('\n'.join(inits), file=f)
print('}', file=f)
# TODO(b/232598137): Maybe we should just implement generic printing in C++
# instead of generating it for every instruction.
def _gen_insn_debug(f, insn):
name = insn.get('name')
mnemo = insn.get('mnemo')
print('std::string %s::GetDebugString() const {' % (name), file=f)
operands = _get_insn_debug_operands(insn)
if not operands:
print(' return "%s";' % (mnemo), file=f)
else:
print(' std::string s("%s ");' % (mnemo), file=f)
print(' s += %s;' % (operands[0]), file=f)
for op in operands[1:]:
print(' s += ", ";', file=f)
print(' s += %s;' % (op), file=f)
# We don't print recovery_bb() since it can be found by edges outgoing from basic block.
print(' if (recovery_pc()) {', file=f)
print(' s += StringPrintf(" <0x%" PRIxPTR ">", recovery_pc());', file=f)
print(' }', file=f)
print(' return s;', file=f)
print('}', file=f)
def _gen_insn_emit(f, insn):
name = insn.get('name')
asm = insn.get('asm')
operands, _ = _get_insn_operands(insn)
asm_args = [op.asm_arg for op in operands if op.asm_arg]
print('void %s::Emit(CodeEmitter* as) const {' % (name), file=f)
print('%sas->%s(%s);' % (INDENT, asm, ', '.join(asm_args)), file=f)
print('}', file=f)
def _gen_insn_class(f, insn):
name = insn.get('name')
operands, side_effects = _get_insn_operands(insn)
regs = [op.reg_operand_info for op in operands if op.reg_operand_info]
if side_effects:
kind = 'kMachineInsnSideEffects'
else:
kind = 'kMachineInsnDefault'
params = ['%s %s' % (op.type, op.name) for op in operands]
print('class %s : public MachineInsnForArch {' % (name), file=f)
print(' public:', file=f)
print(' explicit %s(%s);' % (name, ', '.join(params)), file=f)
print(' static constexpr MachineInsnInfo kInfo =', file=f)
print(' MachineInsnInfo({kMachineOp%s,' % (name), file=f)
print(' %d,' % (len(regs)), file=f)
print(' {%s},' % (', '.join(regs)), file=f)
print(' %s});' % (kind), file=f)
print(' static constexpr int NumRegOperands() { return kInfo.num_reg_operands; }', file=f)
print(' static constexpr const MachineRegKind& RegKindAt(int i) { return kInfo.reg_kinds[i]; }', file=f)
print(' std::string GetDebugString() const override;', file=f)
print(' void Emit(CodeEmitter* as) const override;', file=f)
print('};', file=f)
def gen_code_2_cc(out, arch, insns):
with open(out, 'w') as f:
for insn in insns:
_gen_insn_ctor(f, insn)
def gen_code_debug_cc(out, arch, insns):
with open(out, 'w') as f:
print("""\
// This file automatically generated by gen_lir.py
// DO NOT EDIT!
#include "berberis/base/stringprintf.h"
#include "berberis/backend/%s/code_debug.h"
namespace berberis {
namespace %s {
""" % (arch, arch), file=f)
for insn in insns:
_gen_insn_debug(f, insn)
print("""\
} // namespace %s
} // namespace berberis""" % (arch), file=f)
def gen_code_emit_cc(out, arch, insns):
with open(out, 'w') as f:
print("""\
// This file automatically generated by gen_lir.py
// DO NOT EDIT!
#include "berberis/backend/code_emitter.h"
#include "berberis/backend/%s/code_emit.h"
namespace berberis {
namespace %s {
""" % (arch, arch), file=f)
for insn in insns:
_gen_insn_emit(f, insn)
print("""\
} // namespace %s
} // namespace berberis""" % (arch), file=f)
def gen_machine_info_h(out, arch, insns):
with open(out, 'w') as f:
for insn in insns:
name = insn.get('name')
print('using %s = %s;' % (name, name), file=f)
def gen_machine_opcode_h(out, arch, insns):
with open(out, 'w') as f:
for insn in insns:
name = insn.get('name')
print('kMachineOp%s,' % (name), file=f)
def _gen_mem_insn_groups(f, insns):
# Build a dictionary to map a memory insn group name to another dictionary,
# which in turn maps an addressing mode to an individual memory insn.
groups = {}
for i in insns:
group_name = i.get('mem_group_name')
if group_name:
groups.setdefault(group_name, {})[i.get('addr_mode')] = i.get('name')
for group_name in sorted(groups):
# The order of the addressing modes here is important. It must
# match what MemInsns expects.
mem_insns = [groups[group_name][addr_mode]
for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')]
print('using %s = MemInsns<%s>;' % (group_name, ', '.join(mem_insns)), file=f)
def gen_machine_ir_h(out, arch, insns):
with open(out, 'w') as f:
for insn in insns:
_gen_insn_class(f, insn)
print('', file=f)
_gen_mem_insn_groups(f, insns)
def _contains_mem(insn):
return any(asm_defs.is_mem_op(arg['class']) for arg in insn.get('args'))
def _create_mem_insn(insn, addr_mode):
new_insn = insn.copy()
macro_name = asm_defs.get_mem_macro_name(insn, addr_mode)
new_insn['name'] = macro_name
new_insn['addr_mode'] = addr_mode
new_insn['asm'] = macro_name
new_insn['mem_group_name'] = asm_defs.get_mem_macro_name(insn, '') + 'Insns'
return new_insn
def _expand_mem_insns(insns):
result = []
for insn in insns:
if _contains_mem(insn):
result.extend([_create_mem_insn(insn, addr_mode)
for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')])
result.append(insn)
return result
def _load_lir_def(allowlist_looked, allowlist_found, asm_def):
arch, insns = asm_defs.load_asm_defs(asm_def)
insns = _expand_mem_insns(insns)
# Mark all instructions to remove and remember instructions we kept
for insn in insns:
insn_name = insn.get('mem_group_name', insn['name'])
if insn_name in allowlist_looked:
allowlist_found.add(insn_name)
else:
insn['skip_lir'] = 1
# Filter out disabled instructions.
insns = [i for i in insns if not i.get('skip_lir')]
return arch, insns
def _allowlist_instructions(allowlist_files, machine_ir_intrinsic_binding_files):
allowlisted_names = set()
for allowlist_file in allowlist_files:
with open(allowlist_file) as allowlist_json:
for insn_name in json.load(allowlist_json)['insns']:
allowlisted_names.add(insn_name)
for machine_ir_intrinsic_binding_file in machine_ir_intrinsic_binding_files:
with open(machine_ir_intrinsic_binding_file) as machine_ir_intrinsic_binding_json:
json_array = json.load(machine_ir_intrinsic_binding_json)
# insn of type str is actually part of the file license.
while isinstance(json_array[0], str):
json_array.pop(0)
for insn in json_array:
if insn.get('usage', '') != 'interpret-only':
allowlisted_names.add(insn['insn'])
return allowlisted_names
def load_all_lir_defs(allowlist_files, machine_ir_intrinsic_binding_files, lir_defs):
allowlist_looked = _allowlist_instructions(
allowlist_files, machine_ir_intrinsic_binding_files)
allowlist_found = set()
arch = None
insns = []
macro_insns = []
for lir_def in lir_defs:
def_arch, def_insns = _load_lir_def(allowlist_looked, allowlist_found, lir_def)
if arch and not arch.startswith('common_'):
assert def_arch is None or arch == def_arch
else:
arch = def_arch
if def_arch is None:
macro_insns.extend(def_insns)
else:
insns.extend(def_insns)
for insn in insns:
_check_insn_defs(insn)
# Some macroinstructions can only be used in Lite translator for now. Ignore them here.
insns.extend(insn for insn in macro_insns if _check_insn_defs(insn, True))
assert allowlist_looked == allowlist_found
return arch, insns