| #!/usr/bin/env python3 |
| """ The Python Hipify script. |
| ## |
| # Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. |
| # 2017-2018 Advanced Micro Devices, Inc. and |
| # Facebook Inc. All rights reserved. |
| # |
| # Permission is hereby granted, free of charge, to any person obtaining a copy |
| # of this software and associated documentation files (the "Software"), to deal |
| # in the Software without restriction, including without limitation the rights |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| # copies of the Software, and to permit persons to whom the Software is |
| # furnished to do so, subject to the following conditions: |
| # |
| # The above copyright notice and this permission notice shall be included in |
| # all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| # THE SOFTWARE. |
| """ |
| import argparse |
| import fnmatch |
| import re |
| import shutil |
| import sys |
| import os |
| |
| from . import constants |
| from .cuda_to_hip_mappings import CUDA_TO_HIP_MAPPINGS |
| from .cuda_to_hip_mappings import MATH_TRANSPILATIONS |
| |
| from typing import Dict, List, Iterator, Optional |
| from collections.abc import Mapping, Iterable |
| HipifyResult = Dict[str, Optional[str]] |
| HipifyFinalResult = Dict[str, HipifyResult] |
| HIPIFY_C_BREADCRUMB = "// !!! This is a file automatically generated by hipify!!!\n" |
| HIPIFY_FINAL_RESULT: HipifyFinalResult = {} |
| |
| # Hardcode the PyTorch template map |
| """This dictionary provides the mapping from PyTorch kernel template types |
| to their actual types.""" |
| PYTORCH_TEMPLATE_MAP = {"Dtype": "scalar_t", "T": "scalar_t"} |
| |
| __all__ = ['InputError', 'openf', 'bcolors', 'GeneratedFileCleaner', 'match_extensions', 'matched_files_iter', |
| 'preprocess_file_and_save_result', 'compute_stats', 'add_dim3', 'processKernelLaunches', 'find_closure_group', |
| 'find_bracket_group', 'find_parentheses_group', 'replace_math_functions', 'hip_header_magic', 'replace_extern_shared', |
| 'get_hip_file_path', 'is_out_of_place', 'is_pytorch_file', 'is_cusparse_file', 'is_caffe2_gpu_file', |
| 'is_caffe2_gpu_file', 'Trie', 'preprocessor', 'file_specific_replacement', 'file_add_header', |
| 'fix_static_global_kernels', 'extract_arguments', 'str2bool', 'hipify'] |
| |
| |
| class InputError(Exception): |
| # Exception raised for errors in the input. |
| |
| def __init__(self, message): |
| super(InputError, self).__init__(message) |
| self.message = message |
| |
| def __str__(self): |
| return "{}: {}".format("Input error", self.message) |
| |
| |
| def openf(filename, mode): |
| return open(filename, mode, errors='ignore') |
| |
| |
| # Color coding for printing |
| class bcolors: |
| HEADER = '\033[95m' |
| OKBLUE = '\033[94m' |
| OKGREEN = '\033[92m' |
| WARNING = '\033[93m' |
| FAIL = '\033[91m' |
| ENDC = '\033[0m' |
| BOLD = '\033[1m' |
| UNDERLINE = '\033[4m' |
| |
| |
| # To the programmer, the output of hipify most likely are intermediates. |
| # This class allows users of hipify to ask for a cleanup by running the |
| # hipify and compilation in a with instantiating this context manager class |
| # with keep_intermediates=False. |
| # The main usecase is the cpp_extensions, specifically the load method. |
| # It is a good idea to keep intermediates (in case of errors or to |
| # not recompile unchanged files), but in cases where you don't want to |
| # keep them (e.g. in the CI), this can be used to remove files. |
| class GeneratedFileCleaner: |
| """Context Manager to clean up generated files""" |
| def __init__(self, keep_intermediates=False): |
| self.keep_intermediates = keep_intermediates |
| self.files_to_clean = set() |
| self.dirs_to_clean = [] |
| |
| def __enter__(self): |
| return self |
| |
| def open(self, fn, *args, **kwargs): |
| if not os.path.exists(fn): |
| self.files_to_clean.add(os.path.abspath(fn)) |
| return open(fn, *args, **kwargs) |
| |
| def makedirs(self, dn, exist_ok=False): |
| parent, n = os.path.split(dn) |
| if not n: |
| parent, n = os.path.split(parent) |
| if parent and n and not os.path.exists(parent): |
| self.makedirs(parent, exist_ok=True) |
| if not os.path.isdir(dn) or not exist_ok: |
| os.mkdir(dn) |
| self.dirs_to_clean.append(os.path.abspath(dn)) |
| |
| def __exit__(self, type, value, traceback): |
| if not self.keep_intermediates: |
| for f in self.files_to_clean: |
| os.unlink(f) |
| for d in self.dirs_to_clean[::-1]: |
| os.rmdir(d) |
| |
| |
| def match_extensions(filename: str, extensions: Iterable) -> bool: |
| """Helper method to see if filename ends with certain extension""" |
| return any(filename.endswith(e) for e in extensions) |
| |
| |
| def _fnmatch(filepath, patterns): |
| return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns) |
| |
| |
| def matched_files_iter( |
| root_path: str, |
| includes: Iterable = (), |
| ignores: Iterable = (), |
| extensions: Iterable = (), |
| out_of_place_only: bool = False, |
| is_pytorch_extension: bool = False) -> Iterator[str]: |
| |
| exact_matches = set(includes) |
| |
| # This is a very rough heuristic; really, we want to avoid scanning |
| # any file which is not checked into source control, but this script |
| # needs to work even if you're in a Git or Hg checkout, so easier to |
| # just block the biggest time sinks that won't matter in the |
| # end. |
| for (abs_dirpath, dirs, filenames) in os.walk(root_path, topdown=True): |
| rel_dirpath = os.path.relpath(abs_dirpath, root_path) |
| if rel_dirpath == '.': |
| # Blah blah blah O(n) blah blah |
| if ".git" in dirs: |
| dirs.remove(".git") |
| if "build" in dirs: |
| dirs.remove("build") |
| if "third_party" in dirs: |
| dirs.remove("third_party") |
| for filename in filenames: |
| filepath = os.path.join(abs_dirpath, filename) |
| rel_filepath = os.path.join(rel_dirpath, filename) |
| # We respect extensions, UNLESS you wrote the entire |
| # filename verbatim, in which case we always accept it |
| if ( |
| _fnmatch(filepath, includes) |
| and (not _fnmatch(filepath, ignores)) |
| and (match_extensions(filepath, extensions) or filepath in exact_matches) |
| ): |
| if not is_pytorch_extension: # for pytorch extensions, consider all files |
| if not is_pytorch_file(rel_filepath) and not is_caffe2_gpu_file(rel_filepath): |
| continue |
| if out_of_place_only and not is_out_of_place(rel_filepath): |
| continue |
| yield filepath |
| |
| |
| def preprocess_file_and_save_result( |
| output_directory: str, |
| filepath: str, |
| all_files: Iterable, |
| header_include_dirs: Iterable, |
| stats: Dict[str, List], |
| hip_clang_launch: bool, |
| is_pytorch_extension: bool, |
| clean_ctx: GeneratedFileCleaner, |
| show_progress: bool) -> None: |
| result = preprocessor(output_directory, filepath, all_files, header_include_dirs, stats, |
| hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress) |
| |
| fin_path = os.path.abspath(os.path.join(output_directory, filepath)) |
| # Show what happened |
| if show_progress and "ignored" not in str(result["status"]): |
| print( |
| fin_path, "->", |
| result["hipified_path"], result["status"], flush=True) |
| |
| HIPIFY_FINAL_RESULT[fin_path] = result |
| |
| |
| def compute_stats(stats): |
| unsupported_calls = {cuda_call for (cuda_call, _filepath) in stats["unsupported_calls"]} |
| |
| # Print the number of unsupported calls |
| print("Total number of unsupported CUDA function calls: {0:d}".format(len(unsupported_calls))) |
| |
| # Print the list of unsupported calls |
| print(", ".join(unsupported_calls)) |
| |
| # Print the number of kernel launches |
| print("\nTotal number of replaced kernel launches: {0:d}".format(len(stats["kernel_launches"]))) |
| |
| |
| def add_dim3(kernel_string, cuda_kernel): |
| '''adds dim3() to the second and third arguments in the kernel launch''' |
| count = 0 |
| closure = 0 |
| kernel_string = kernel_string.replace("<<<", "").replace(">>>", "") |
| arg_locs: List[Dict[str, int]] = [{} for _ in range(2)] |
| arg_locs[count]['start'] = 0 |
| for ind, c in enumerate(kernel_string): |
| if count > 1: |
| break |
| if c == "(": |
| closure += 1 |
| elif c == ")": |
| closure -= 1 |
| if (c == "," or ind == len(kernel_string) - 1) and closure == 0: |
| arg_locs[count]['end'] = ind + (c != ",") |
| count += 1 |
| if count < 2: |
| arg_locs[count]['start'] = ind + 1 |
| |
| first_arg_raw = kernel_string[arg_locs[0]['start']:arg_locs[0]['end'] + 1] |
| second_arg_raw = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']] |
| |
| first_arg_clean = kernel_string[arg_locs[0]['start']:arg_locs[0]['end']].replace("\n", "").strip(" ") |
| second_arg_clean = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']].replace("\n", "").strip(" ") |
| |
| first_arg_dim3 = "dim3({})".format(first_arg_clean) |
| second_arg_dim3 = "dim3({})".format(second_arg_clean) |
| |
| first_arg_raw_dim3 = first_arg_raw.replace(first_arg_clean, first_arg_dim3) |
| second_arg_raw_dim3 = second_arg_raw.replace(second_arg_clean, second_arg_dim3) |
| cuda_kernel = cuda_kernel.replace(first_arg_raw + second_arg_raw, first_arg_raw_dim3 + second_arg_raw_dim3) |
| return cuda_kernel |
| |
| |
| RE_KERNEL_LAUNCH = re.compile(r'([ ]+)(detail?)::[ ]+\\\n[ ]+') |
| |
| |
| def processKernelLaunches(string, stats): |
| """ Replace the CUDA style Kernel launches with the HIP style kernel launches.""" |
| # Concat the namespace with the kernel names. (Find cleaner way of doing this later). |
| string = RE_KERNEL_LAUNCH.sub(lambda inp: "{0}{1}::".format(inp.group(1), inp.group(2)), string) |
| |
| def grab_method_and_template(in_kernel): |
| # The positions for relevant kernel components. |
| pos = { |
| "kernel_launch": {"start": in_kernel["start"], "end": in_kernel["end"]}, |
| "kernel_name": {"start": -1, "end": -1}, |
| "template": {"start": -1, "end": -1} |
| } |
| |
| # Count for balancing template |
| count = {"<>": 0} |
| |
| # Status for whether we are parsing a certain item. |
| START = 0 |
| AT_TEMPLATE = 1 |
| AFTER_TEMPLATE = 2 |
| AT_KERNEL_NAME = 3 |
| |
| status = START |
| |
| # Parse the string character by character |
| for i in range(pos["kernel_launch"]["start"] - 1, -1, -1): |
| char = string[i] |
| |
| # Handle Templating Arguments |
| if status == START or status == AT_TEMPLATE: |
| if char == ">": |
| if status == START: |
| status = AT_TEMPLATE |
| pos["template"]["end"] = i |
| count["<>"] += 1 |
| |
| if char == "<": |
| count["<>"] -= 1 |
| if count["<>"] == 0 and (status == AT_TEMPLATE): |
| pos["template"]["start"] = i |
| status = AFTER_TEMPLATE |
| |
| # Handle Kernel Name |
| if status != AT_TEMPLATE: |
| if string[i].isalnum() or string[i] in {'(', ')', '_', ':', '#'}: |
| if status != AT_KERNEL_NAME: |
| status = AT_KERNEL_NAME |
| pos["kernel_name"]["end"] = i |
| |
| # Case: Kernel name starts the string. |
| if i == 0: |
| pos["kernel_name"]["start"] = 0 |
| |
| # Finished |
| return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
| |
| else: |
| # Potential ending point if we're already traversing a kernel's name. |
| if status == AT_KERNEL_NAME: |
| pos["kernel_name"]["start"] = i |
| |
| # Finished |
| return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
| |
| def find_kernel_bounds(string): |
| """Finds the starting and ending points for all kernel launches in the string.""" |
| kernel_end = 0 |
| kernel_positions = [] |
| |
| # Continue until we cannot find any more kernels anymore. |
| while string.find("<<<", kernel_end) != -1: |
| # Get kernel starting position (starting from the previous ending point) |
| kernel_start = string.find("<<<", kernel_end) |
| |
| # Get kernel ending position (adjust end point past the >>>) |
| kernel_end = string.find(">>>", kernel_start) + 3 |
| if kernel_end <= 0: |
| raise InputError("no kernel end found") |
| |
| # Add to list of traversed kernels |
| kernel_positions.append({"start": kernel_start, "end": kernel_end, |
| "group": string[kernel_start: kernel_end]}) |
| |
| return kernel_positions |
| |
| # Replace comments and string literals from the code so that find_kernel_bounds does not |
| # wrongly capture kernels in comments and string literals. |
| # This function replaces them with "x" to keep positions. |
| def mask_comments(string): |
| in_comment = '' |
| prev_c = '' |
| new_string = '' |
| for c in string: |
| if in_comment == '': |
| # Outside comments |
| if c == '/' and prev_c == '/': |
| in_comment = '//' |
| elif c == '*' and prev_c == '/': |
| in_comment = '/*' |
| elif c == '"' and prev_c != '\\' and prev_c != "'": |
| in_comment = '"' |
| elif in_comment == '//': |
| # In // xxx |
| if c == '\r' or c == '\n': |
| in_comment = '' |
| elif in_comment == '/*': |
| # In /* xxx */ |
| if c == '/' and prev_c == '*': |
| in_comment = '' |
| elif in_comment == '"': |
| # In "" |
| if c == '"' and prev_c != '\\': |
| in_comment = '' |
| prev_c = c |
| if in_comment == '': |
| new_string += c |
| else: |
| new_string += 'x' |
| return new_string |
| |
| # Grab positional ranges of all kernel launches |
| get_kernel_positions = list(find_kernel_bounds(mask_comments(string))) |
| output_string = string |
| |
| # Replace each CUDA kernel with a HIP kernel. |
| for kernel in get_kernel_positions: |
| # Get kernel components |
| params = grab_method_and_template(kernel) |
| |
| # Find parenthesis after kernel launch |
| parenthesis = string.find("(", kernel["end"]) |
| |
| # Extract cuda kernel |
| cuda_kernel = string[params[0]["start"]:parenthesis + 1] |
| kernel_string = string[kernel['start']:kernel['end']] |
| end_param_index = 0 if params[1]['end'] == -1 else 1 |
| kernel_name_with_template = string[params[0]['start']:params[end_param_index]['end'] + 1] |
| cuda_kernel_dim3 = add_dim3(kernel_string, cuda_kernel) |
| # Keep number of kernel launch params consistent (grid dims, group dims, stream, dynamic shared size) |
| num_klp = len(extract_arguments(0, kernel["group"].replace("<<<", "(").replace(">>>", ")"))) |
| |
| hip_kernel = "hipLaunchKernelGGL(" + cuda_kernel_dim3[0:-1].replace( |
| ">>>", ", 0" * (4 - num_klp) + ">>>").replace("<<<", ", ").replace( |
| ">>>", ", ").replace(kernel_name_with_template, "(" + kernel_name_with_template + ")") |
| |
| # Replace cuda kernel with hip kernel |
| output_string = output_string.replace(cuda_kernel, hip_kernel) |
| |
| # Update the statistics |
| stats["kernel_launches"].append(hip_kernel) |
| |
| return output_string |
| |
| |
| def find_closure_group(input_string, start, group): |
| """Generalization for finding a balancing closure group |
| |
| if group = ["(", ")"], then finds the first balanced parentheses. |
| if group = ["{", "}"], then finds the first balanced bracket. |
| |
| Given an input string, a starting position in the input string, and the group type, |
| find_closure_group returns the positions of group[0] and group[1] as a tuple. |
| |
| Example: |
| >>> find_closure_group("(hi)", 0, ["(", ")"]) |
| (0, 3) |
| """ |
| |
| inside_parenthesis = False |
| parens = 0 |
| pos = start |
| p_start, p_end = -1, -1 |
| |
| while pos < len(input_string): |
| if input_string[pos] == group[0]: |
| if inside_parenthesis is False: |
| inside_parenthesis = True |
| parens = 1 |
| p_start = pos |
| else: |
| parens += 1 |
| elif input_string[pos] == group[1] and inside_parenthesis: |
| parens -= 1 |
| |
| if parens == 0: |
| p_end = pos |
| return p_start, p_end |
| |
| pos += 1 |
| return None, None |
| |
| |
| def find_bracket_group(input_string, start): |
| """Finds the first balanced parantheses.""" |
| return find_closure_group(input_string, start, group=["{", "}"]) |
| |
| |
| def find_parentheses_group(input_string, start): |
| """Finds the first balanced bracket.""" |
| return find_closure_group(input_string, start, group=["(", ")"]) |
| |
| |
| RE_ASSERT = re.compile(r"\bassert[ ]*\(") |
| |
| |
| def replace_math_functions(input_string): |
| """FIXME: Temporarily replace std:: invocations of math functions |
| with non-std:: versions to prevent linker errors NOTE: This |
| can lead to correctness issues when running tests, since the |
| correct version of the math function (exp/expf) might not get |
| called. Plan is to remove this function once HIP supports |
| std:: math function calls inside device code |
| |
| """ |
| output_string = input_string |
| for func in MATH_TRANSPILATIONS: |
| output_string = output_string.replace(r'{}('.format(func), '{}('.format(MATH_TRANSPILATIONS[func])) |
| |
| return output_string |
| |
| |
| RE_SYNCTHREADS = re.compile(r":?:?\b(__syncthreads)\b(\w*\()") |
| |
| |
| def hip_header_magic(input_string): |
| """If the file makes kernel builtin calls and does not include the cuda_runtime.h header, |
| then automatically add an #include to match the "magic" includes provided by NVCC. |
| TODO: |
| Update logic to ignore cases where the cuda_runtime.h is included by another file. |
| """ |
| |
| # Copy the input. |
| output_string = input_string |
| |
| # Check if one of the following headers is already included. |
| headers = ["hip/hip_runtime.h", "hip/hip_runtime_api.h"] |
| if any(re.search(r'#include ("{0}"|<{0}>)'.format(ext), output_string) for ext in headers): |
| return output_string |
| |
| # Rough logic to detect if we're inside device code |
| hasDeviceLogic: int |
| hasDeviceLogic = "hipLaunchKernelGGL" in output_string |
| hasDeviceLogic += "__global__" in output_string |
| hasDeviceLogic += "__shared__" in output_string |
| hasDeviceLogic += RE_SYNCTHREADS.search(output_string) is not None |
| |
| # If device logic found, provide the necessary header. |
| if hasDeviceLogic: |
| output_string = '#include "hip/hip_runtime.h"\n' + input_string |
| |
| return output_string |
| |
| |
| RE_EXTERN_SHARED = re.compile(r"extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;") |
| |
| |
| def replace_extern_shared(input_string): |
| """Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead. |
| https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__ |
| Example: |
| "extern __shared__ char smemChar[];" => "HIP_DYNAMIC_SHARED( char, smemChar)" |
| "extern __shared__ unsigned char smem[];" => "HIP_DYNAMIC_SHARED( unsigned char, my_smem)" |
| """ |
| output_string = input_string |
| output_string = RE_EXTERN_SHARED.sub( |
| lambda inp: "HIP_DYNAMIC_SHARED({0} {1}, {2})".format( |
| inp.group(1) or "", inp.group(2), inp.group(3)), output_string) |
| |
| return output_string |
| |
| |
| def get_hip_file_path(rel_filepath, is_pytorch_extension=False): |
| """ |
| Returns the new name of the hipified file |
| """ |
| # At the moment, some PyTorch source files are HIPified in place. The predicate |
| # is_out_of_place tells us if this is the case or not. |
| assert not os.path.isabs(rel_filepath) |
| if not is_pytorch_extension and not is_out_of_place(rel_filepath): |
| return rel_filepath |
| |
| dirpath, filename = os.path.split(rel_filepath) |
| root, ext = os.path.splitext(filename) |
| |
| # Here's the plan: |
| # |
| # In general, we need to disambiguate the HIPified filename so that |
| # it gets a different name from the original filename, so |
| # that we don't overwrite the original file |
| # |
| # There's a lot of different naming conventions across PyTorch |
| # and Caffe2, but the general recipe is to convert occurrences |
| # of cuda/gpu to hip, and add hip if there are no occurrences |
| # of cuda/gpu anywhere. |
| # |
| # Concretely, we do the following: |
| # |
| # - If there is a directory component named "cuda", replace |
| # it with "hip", AND |
| # |
| # - If the file name contains "CUDA", replace it with "HIP", AND |
| # |
| # - ALWAYS replace '.cu' with '.hip', because those files |
| # contain CUDA kernels that needs to be hipified and processed with |
| # hip compiler |
| # |
| # - If we are not hipifying a PyTorch extension, and the parent |
| # directory name did not change as a result of the above |
| # transformations, insert "hip" in the file path |
| # as the direct parent folder of the file |
| # |
| # - If we are hipifying a PyTorch extension, and the parent directory |
| # name as well as the filename (incl. extension) did not change as |
| # a result of the above transformations, insert "_hip" in the filename |
| # |
| # This isn't set in stone; we might adjust this to support other |
| # naming conventions. |
| |
| if ext == '.cu': |
| ext = '.hip' |
| |
| orig_filename = filename |
| orig_dirpath = dirpath |
| |
| dirpath = dirpath.replace('cuda', 'hip') |
| dirpath = dirpath.replace('CUDA', 'HIP') |
| dirpath = dirpath.replace('THC', 'THH') |
| |
| root = root.replace('cuda', 'hip') |
| root = root.replace('CUDA', 'HIP') |
| # Special case to handle caffe2/core/THCCachingAllocator |
| if dirpath != "caffe2/core": |
| root = root.replace('THC', 'THH') |
| |
| if not is_pytorch_extension and dirpath == orig_dirpath: |
| dirpath = os.path.join(dirpath, 'hip') |
| |
| if is_pytorch_extension and dirpath == orig_dirpath and (root + ext) == orig_filename: |
| root = root + "_hip" |
| |
| return os.path.join(dirpath, root + ext) |
| |
| |
| def is_out_of_place(rel_filepath): |
| assert not os.path.isabs(rel_filepath) |
| if rel_filepath.startswith("torch/"): |
| return False |
| if rel_filepath.startswith("tools/autograd/templates/"): |
| return False |
| return True |
| |
| |
| # Keep this synchronized with includes/ignores in build_amd.py |
| def is_pytorch_file(rel_filepath): |
| assert not os.path.isabs(rel_filepath) |
| if rel_filepath.startswith("aten/"): |
| if rel_filepath.startswith("aten/src/ATen/core/"): |
| return False |
| return True |
| if rel_filepath.startswith("torch/"): |
| return True |
| if rel_filepath.startswith("tools/autograd/templates/"): |
| return True |
| return False |
| |
| |
| def is_cusparse_file(rel_filepath): |
| if is_pytorch_file(rel_filepath): |
| return "sparse" in rel_filepath.lower() |
| return False |
| |
| |
| def is_caffe2_gpu_file(rel_filepath): |
| assert not os.path.isabs(rel_filepath) |
| if rel_filepath.startswith("c10/cuda"): |
| return True |
| filename = os.path.basename(rel_filepath) |
| _, ext = os.path.splitext(filename) |
| return ('gpu' in filename or ext in ['.cu', '.cuh']) and ('cudnn' not in filename) |
| |
| |
| # Cribbed from https://stackoverflow.com/questions/42742810/speed-up-millions-of-regex-replacements-in-python-3/42789508#42789508 |
| class Trie(): |
| """Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern. |
| The corresponding Regex should match much faster than a simple Regex union.""" |
| |
| def __init__(self): |
| self.data = {} |
| |
| def add(self, word): |
| ref = self.data |
| for char in word: |
| ref[char] = char in ref and ref[char] or {} |
| ref = ref[char] |
| ref[''] = 1 |
| |
| def dump(self): |
| return self.data |
| |
| def quote(self, char): |
| return re.escape(char) |
| |
| def _pattern(self, pData): |
| data = pData |
| if "" in data and len(data.keys()) == 1: |
| return None |
| |
| alt = [] |
| cc = [] |
| q = 0 |
| for char in sorted(data.keys()): |
| if isinstance(data[char], dict): |
| try: |
| recurse = self._pattern(data[char]) |
| alt.append(self.quote(char) + recurse) |
| except Exception: |
| cc.append(self.quote(char)) |
| else: |
| q = 1 |
| cconly = not len(alt) > 0 |
| |
| if len(cc) > 0: |
| if len(cc) == 1: |
| alt.append(cc[0]) |
| else: |
| alt.append('[' + ''.join(cc) + ']') |
| |
| if len(alt) == 1: |
| result = alt[0] |
| else: |
| result = "(?:" + "|".join(alt) + ")" |
| |
| if q: |
| if cconly: |
| result += "?" |
| else: |
| result = "(?:%s)?" % result |
| return result |
| |
| def pattern(self): |
| return self._pattern(self.dump()) |
| |
| |
| CAFFE2_TRIE = Trie() |
| CAFFE2_MAP = {} |
| PYTORCH_TRIE = Trie() |
| PYTORCH_MAP: Dict[str, object] = {} |
| |
| # In PyTorch, we map cuBLAS->rocBLAS and cuSPARSE->hipSPARSE. Note the prefix, roc versus hip. |
| # The 'hip' APIs offer a more direct CUDA-friendly mapping, but calling rocBLAS directly has better performance. |
| # Unfortunately, the roc* types and hip* types differ, i.e., rocblas_float_complex versus hipComplex. |
| # In the case of SPARSE, we must use the hip types for complex instead of the roc types, |
| # but the pytorch mappings assume roc. Therefore, we create a new SPARSE mapping that has a higher priority. |
| # Its mappings will trigger first, and only when a miss occurs will the lower-priority pytorch mapping take place. |
| # When a file contains "sparse" in the filename, a mapping marked with API_SPARSE is preferred over other choices. |
| PYTORCH_SPARSE_MAP = {} |
| |
| for mapping in CUDA_TO_HIP_MAPPINGS: |
| assert isinstance(mapping, Mapping) |
| for src, value in mapping.items(): |
| dst = value[0] |
| meta_data = value[1:] |
| if constants.API_CAFFE2 not in meta_data: |
| PYTORCH_TRIE.add(src) |
| # if src is already in PYTORCH_MAP and dst belongs to API_SPARSE |
| # do not overwrite PYTORCH_MAP, store dst separately |
| if constants.API_SPARSE in meta_data and PYTORCH_MAP.get(src, ""): |
| PYTORCH_SPARSE_MAP[src] = dst |
| else: |
| PYTORCH_MAP[src] = dst |
| if constants.API_PYTORCH not in meta_data: |
| CAFFE2_TRIE.add(src) |
| CAFFE2_MAP[src] = dst |
| RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.pattern()) |
| RE_PYTORCH_PREPROCESSOR = re.compile(r'(?<=\W)({0})(?=\W)'.format(PYTORCH_TRIE.pattern())) |
| |
| RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"') |
| RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>') |
| RE_THC_GENERIC_FILE = re.compile(r'#define THC_GENERIC_FILE "([^"]+)"') |
| RE_CU_SUFFIX = re.compile(r'\.cu\b') # be careful not to pick up .cuh |
| |
| """ |
| Returns a dict with the following keys: |
| "hipified_path" : absolute path of hipified source file |
| "status" : "ok" if hipified file was written out |
| "skipped" if an identical hipified file already existed or hipified file couldn't be written out |
| "ignored" if the source file was a hipified file itself or not meant to be hipified |
| """ |
| |
| |
| def preprocessor( |
| output_directory: str, |
| filepath: str, |
| all_files: Iterable, |
| header_include_dirs: Iterable, |
| stats: Dict[str, List], |
| hip_clang_launch: bool, |
| is_pytorch_extension: bool, |
| clean_ctx: GeneratedFileCleaner, |
| show_progress: bool) -> HipifyResult: |
| """ Executes the CUDA -> HIP conversion on the specified file. """ |
| if filepath not in all_files: |
| return {"hipified_path": None, "status": "[ignored, not to be hipified]"} |
| |
| fin_path = os.path.abspath(os.path.join(output_directory, filepath)) |
| rel_filepath = os.path.relpath(filepath, output_directory) |
| |
| with open(fin_path, 'r', encoding='utf-8') as fin: |
| if fin.readline() == HIPIFY_C_BREADCRUMB: |
| return {"hipified_path": None, "status": "[ignored, input is hipified output]"} |
| fin.seek(0) |
| output_source = fin.read() |
| |
| orig_output_source = output_source |
| |
| # get_hip_file_path needs a relative path to work correctly |
| fout_path = os.path.abspath(os.path.join(output_directory, get_hip_file_path(rel_filepath, is_pytorch_extension))) |
| if not os.path.exists(os.path.dirname(fout_path)): |
| clean_ctx.makedirs(os.path.dirname(fout_path)) |
| |
| # unsupported_calls statistics reporting is broken atm |
| def pt_repl(m): |
| return PYTORCH_MAP[m.group(0)] |
| |
| def pt_sparse_repl(m): |
| # checks SPARSE map first, and if a miss occurs, falls back to pytorch mappings |
| return PYTORCH_SPARSE_MAP.get(m.group(0), pt_repl(m)) |
| |
| if is_pytorch_extension: |
| output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source) |
| else: |
| if is_cusparse_file(rel_filepath): |
| output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_sparse_repl, output_source) |
| elif is_pytorch_file(rel_filepath): |
| output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source) |
| else: |
| def c2_repl(m): |
| return CAFFE2_MAP[m.group(0)] |
| output_source = RE_CAFFE2_PREPROCESSOR.sub(c2_repl, output_source) |
| |
| # Header rewrites |
| def mk_repl(templ, include_current_dir=True): |
| def repl(m): |
| f = m.group(1) |
| dirpath, filename = os.path.split(f) |
| if ( |
| f.startswith("ATen/cuda") |
| or f.startswith("ATen/native/cuda") |
| or f.startswith("ATen/native/nested/cuda") |
| or f.startswith("ATen/native/quantized/cuda") |
| or f.startswith("ATen/native/sparse/cuda") |
| or f.startswith("ATen/native/transformers/cuda") |
| or f.startswith("THC/") |
| or (f.startswith("THC") and not f.startswith("THCP")) |
| ): |
| return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension)) |
| # if filename is one of the files being hipified for this extension |
| if (is_pytorch_extension and any(s.endswith(filename) for s in all_files)): |
| header_dir = None |
| header_filepath = None |
| # If include_current_dir True, look first in same dir as the including source file |
| if include_current_dir: |
| header_dir_to_check = os.path.dirname(fin_path) |
| header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f)) |
| if os.path.exists(header_path_to_check): |
| header_dir = header_dir_to_check |
| header_filepath = header_path_to_check |
| # If not found, look in include dirs one by one and first match wins |
| if header_filepath is None: |
| for header_include_dir in header_include_dirs: |
| header_dir_to_check = os.path.join(output_directory, header_include_dir) |
| header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f)) |
| if os.path.exists(header_path_to_check): |
| header_dir = header_dir_to_check |
| header_filepath = header_path_to_check |
| # If header file not found, keep as is |
| if header_filepath is None: |
| return m.group(0) |
| # Hipify header file first if needed |
| if header_filepath not in HIPIFY_FINAL_RESULT: |
| preprocess_file_and_save_result(output_directory, |
| header_filepath, |
| all_files, header_include_dirs, stats, hip_clang_launch, |
| is_pytorch_extension, clean_ctx, show_progress) |
| hipified_header_filepath = HIPIFY_FINAL_RESULT[header_filepath]["hipified_path"] |
| return templ.format(os.path.relpath(hipified_header_filepath if hipified_header_filepath is not None |
| else header_filepath, header_dir)) |
| |
| return m.group(0) |
| return repl |
| output_source = RE_QUOTE_HEADER.sub(mk_repl('#include "{0}"', True), output_source) |
| output_source = RE_ANGLE_HEADER.sub(mk_repl('#include <{0}>', False), output_source) |
| output_source = RE_THC_GENERIC_FILE.sub(mk_repl('#define THC_GENERIC_FILE "{0}"'), output_source) |
| |
| # CMakeLists.txt rewrites |
| if filepath.endswith('CMakeLists.txt'): |
| output_source = output_source.replace('CUDA', 'HIP') |
| output_source = output_source.replace('THC', 'THH') |
| output_source = RE_CU_SUFFIX.sub('.hip', output_source) |
| |
| # Perform Kernel Launch Replacements |
| if not hip_clang_launch: |
| output_source = processKernelLaunches(output_source, stats) |
| |
| # Replace std:: with non-std:: versions |
| if (filepath.endswith(".cu") or filepath.endswith(".cuh")) and "PowKernel" not in filepath: |
| output_source = replace_math_functions(output_source) |
| |
| # Include header if device code is contained. |
| output_source = hip_header_magic(output_source) |
| |
| # Replace the extern __shared__ |
| # NOTE: No longer needed after transition from hcc to hipclang. |
| # output_source = replace_extern_shared(output_source) |
| |
| # Don't write out identical hipified files for extensions if dirpath has not changed |
| if ( |
| is_pytorch_extension |
| and orig_output_source == output_source |
| and os.path.dirname(fin_path) == os.path.dirname(fout_path) |
| ): |
| return {"hipified_path": fin_path, "status": "[skipped, no changes]"} |
| |
| # Add hipify breadcrumb for C-style files to avoid re-hipification |
| if fin_path != fout_path and match_extensions(fin_path, (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".hpp")): |
| output_source = HIPIFY_C_BREADCRUMB + output_source |
| |
| do_write = True |
| if os.path.exists(fout_path): |
| with open(fout_path, 'r', encoding='utf-8') as fout_old: |
| do_write = fout_old.read() != output_source |
| if do_write: |
| try: |
| with clean_ctx.open(fout_path, 'w', encoding='utf-8') as fout: |
| fout.write(output_source) |
| return {"hipified_path": fout_path, "status": "[ok]"} |
| except PermissionError as e: |
| print(f"{bcolors.WARNING}Failed to save {fout_path} with \"{e.strerror}\", leaving {fin_path} unchanged.{bcolors.ENDC}", |
| file=sys.stderr) |
| return {"hipified_path": fin_path, "status": "[skipped, no permissions]"} |
| else: |
| return {"hipified_path": fout_path, "status": "[skipped, already hipified]"} |
| |
| |
| def file_specific_replacement(filepath, search_string, replace_string, strict=False): |
| with openf(filepath, "r+") as f: |
| contents = f.read() |
| if strict: |
| contents = re.sub(r'\b({0})\b'.format(re.escape(search_string)), lambda x: replace_string, contents) |
| else: |
| contents = contents.replace(search_string, replace_string) |
| f.seek(0) |
| f.write(contents) |
| f.truncate() |
| |
| |
| def file_add_header(filepath, header): |
| with openf(filepath, "r+") as f: |
| contents = f.read() |
| if header[0] != "<" and header[-1] != ">": |
| header = '"{0}"'.format(header) |
| contents = ('#include {0} \n'.format(header)) + contents |
| f.seek(0) |
| f.write(contents) |
| f.truncate() |
| |
| |
| def fix_static_global_kernels(in_txt): |
| """Static global kernels in HIP results in a compilation error.""" |
| in_txt = in_txt.replace(" __global__ static", "__global__") |
| return in_txt |
| |
| |
| RE_INCLUDE = re.compile(r"#include .*\n") |
| |
| |
| def extract_arguments(start, string): |
| """ Return the list of arguments in the upcoming function parameter closure. |
| Example: |
| string (input): '(blocks, threads, 0, THCState_getCurrentStream(state))' |
| arguments (output): |
| '[{'start': 1, 'end': 7}, |
| {'start': 8, 'end': 16}, |
| {'start': 17, 'end': 19}, |
| {'start': 20, 'end': 53}]' |
| """ |
| |
| arguments = [] |
| closures = { |
| "<": 0, |
| "(": 0 |
| } |
| current_position = start |
| argument_start_pos = current_position + 1 |
| |
| # Search for final parenthesis |
| while current_position < len(string): |
| if string[current_position] == "(": |
| closures["("] += 1 |
| elif string[current_position] == ")": |
| closures["("] -= 1 |
| elif string[current_position] == "<": |
| closures["<"] += 1 |
| elif string[current_position] == ">" and string[current_position - 1] != "-" and closures["<"] > 0: |
| closures["<"] -= 1 |
| |
| # Finished all arguments |
| if closures["("] == 0 and closures["<"] == 0: |
| # Add final argument |
| arguments.append({"start": argument_start_pos, "end": current_position}) |
| break |
| |
| # Finished current argument |
| if closures["("] == 1 and closures["<"] == 0 and string[current_position] == ",": |
| arguments.append({"start": argument_start_pos, "end": current_position}) |
| argument_start_pos = current_position + 1 |
| |
| current_position += 1 |
| |
| return arguments |
| |
| |
| def str2bool(v): |
| """ArgumentParser doesn't support type=bool. Thus, this helper method will convert |
| from possible string types to True / False.""" |
| if v.lower() in ('yes', 'true', 't', 'y', '1'): |
| return True |
| elif v.lower() in ('no', 'false', 'f', 'n', '0'): |
| return False |
| else: |
| raise argparse.ArgumentTypeError('Boolean value expected.') |
| |
| |
| def hipify( |
| project_directory: str, |
| show_detailed: bool = False, |
| extensions: Iterable = (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"), |
| header_extensions: Iterable = (".cuh", ".h", ".hpp"), |
| output_directory: str = "", |
| header_include_dirs: Iterable = (), |
| includes: Iterable = ('*',), |
| extra_files: Iterable = (), |
| out_of_place_only: bool = False, |
| ignores: Iterable = (), |
| show_progress: bool = True, |
| hip_clang_launch: bool = False, |
| is_pytorch_extension: bool = False, |
| hipify_extra_files_only: bool = False, |
| clean_ctx: Optional[GeneratedFileCleaner] = None |
| ) -> HipifyFinalResult: |
| if project_directory == "": |
| project_directory = os.getcwd() |
| |
| # Verify the project directory exists. |
| if not os.path.exists(project_directory): |
| print("The project folder specified does not exist.") |
| sys.exit(1) |
| |
| # If no output directory, provide a default one. |
| if not output_directory: |
| project_directory.rstrip("/") |
| output_directory = project_directory + "_amd" |
| |
| if project_directory != output_directory: |
| includes = [include.replace(project_directory, output_directory) for include in includes] |
| ignores = [ignore.replace(project_directory, output_directory) for ignore in ignores] |
| |
| # Copy from project directory to output directory if not done already. |
| if not os.path.exists(output_directory): |
| shutil.copytree(project_directory, output_directory) |
| |
| all_files = list(matched_files_iter(output_directory, includes=includes, |
| ignores=ignores, extensions=extensions, |
| out_of_place_only=out_of_place_only, |
| is_pytorch_extension=is_pytorch_extension)) |
| all_files_set = set(all_files) |
| for f in extra_files: |
| if not os.path.isabs(f): |
| f = os.path.join(output_directory, f) |
| if f not in all_files_set: |
| all_files.append(f) |
| |
| # List all files in header_include_paths to ensure they are hipified |
| from pathlib import Path |
| for header_include_dir in header_include_dirs: |
| if os.path.isabs(header_include_dir): |
| header_include_dir_path = Path(header_include_dir) |
| else: |
| header_include_dir_path = Path(os.path.join(output_directory, header_include_dir)) |
| for path in header_include_dir_path.rglob('*'): |
| if ( |
| path.is_file() |
| and _fnmatch(str(path), includes) |
| and (not _fnmatch(str(path), ignores)) |
| and match_extensions(path.name, header_extensions) |
| ): |
| all_files.append(str(path)) |
| |
| if clean_ctx is None: |
| clean_ctx = GeneratedFileCleaner(keep_intermediates=True) |
| |
| # Preprocessing statistics. |
| stats: Dict[str, List] = {"unsupported_calls": [], "kernel_launches": []} |
| |
| for filepath in (all_files if not hipify_extra_files_only else extra_files): |
| preprocess_file_and_save_result(output_directory, filepath, all_files, header_include_dirs, |
| stats, hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress) |
| |
| print(bcolors.OKGREEN + "Successfully preprocessed all matching files." + bcolors.ENDC, file=sys.stderr) |
| |
| # Show detailed summary |
| if show_detailed: |
| compute_stats(stats) |
| |
| return HIPIFY_FINAL_RESULT |