| /* |
| * cl_kernel.cpp - CL kernel |
| * |
| * Copyright (c) 2015 Intel Corporation |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| * Author: Wind Yuan <[email protected]> |
| */ |
| |
| #include "cl_kernel.h" |
| #include "cl_context.h" |
| #include "cl_device.h" |
| #include "file_handle.h" |
| |
| #include <sys/stat.h> |
| |
| #define ENABLE_DEBUG_KERNEL 0 |
| |
| #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0 |
| |
| namespace XCam { |
| |
| CLKernel::KernelMap CLKernel::_kernel_map; |
| Mutex CLKernel::_kernel_map_mutex; |
| |
| static char* |
| default_cache_path () { |
| static char path[XCAM_MAX_STR_SIZE] = {0}; |
| snprintf ( |
| path, XCAM_MAX_STR_SIZE - 1, |
| "%s/%s", std::getenv ("HOME"), ".xcam/"); |
| |
| return path; |
| } |
| |
| const char* CLKernel::_kernel_cache_path = default_cache_path (); |
| |
| CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name) |
| : _name (NULL) |
| , _kernel_id (NULL) |
| , _context (context) |
| { |
| XCAM_ASSERT (context.ptr ()); |
| //XCAM_ASSERT (name); |
| |
| if (name) |
| _name = strndup (name, XCAM_MAX_STR_SIZE); |
| |
| set_default_work_size (); |
| |
| XCAM_OBJ_PROFILING_INIT; |
| } |
| |
| CLKernel::~CLKernel () |
| { |
| destroy (); |
| if (_name) |
| xcam_free (_name); |
| } |
| |
| void |
| CLKernel::destroy () |
| { |
| if (!_parent_kernel.ptr ()) |
| _context->destroy_kernel_id (_kernel_id); |
| } |
| |
| static void |
| get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8]) |
| { |
| uint32_t key[2]; |
| uint32_t *ptr = (uint32_t*)(str); |
| uint32_t aligned_len = 0; |
| uint32_t i = 0; |
| |
| xcam_mem_clear (key); |
| if (!len) |
| len = strlen (str); |
| aligned_len = XCAM_ALIGN_DOWN (len, 8); |
| |
| for (i = 0; i < aligned_len / 8; ++i) { |
| key[0] ^= ptr[0]; |
| key[1] ^= ptr[1]; |
| ptr += 2; |
| } |
| memcpy (key_id, key, 8); |
| len -= aligned_len; |
| str += aligned_len; |
| for (i = 0; i < len; ++i) { |
| key_id[i] ^= (uint8_t)str[i]; |
| } |
| } |
| |
| XCamReturn |
| CLKernel::build_kernel (const XCamKernelInfo& info, const char* options) |
| { |
| KernelMap::iterator i_kernel; |
| SmartPtr<CLKernel> single_kernel; |
| char key_str[1024]; |
| uint8_t body_key[8]; |
| std::string key; |
| XCamReturn ret = XCAM_RETURN_NO_ERROR; |
| |
| XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null"); |
| |
| xcam_mem_clear (body_key); |
| get_string_key_id (info.kernel_body, info.kernel_body_len, body_key); |
| snprintf ( |
| key_str, sizeof(key_str), |
| "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s", |
| info.kernel_name, |
| body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7], |
| XCAM_STR(options)); |
| key = key_str; |
| |
| char temp_filename[XCAM_MAX_STR_SIZE] = {0}; |
| char cache_filename[XCAM_MAX_STR_SIZE] = {0}; |
| FileHandle temp_file; |
| FileHandle cache_file; |
| size_t read_cache_size = 0; |
| size_t write_cache_size = 0; |
| uint8_t *kernel_cache = NULL; |
| bool load_cache = false; |
| struct timeval ts; |
| |
| const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH"); |
| if (NULL == cache_path) { |
| cache_path = _kernel_cache_path; |
| } |
| |
| snprintf ( |
| cache_filename, XCAM_MAX_STR_SIZE - 1, |
| "%s/%s", |
| cache_path, key_str); |
| |
| { |
| SmartLock locker (_kernel_map_mutex); |
| |
| i_kernel = _kernel_map.find (key); |
| if (i_kernel == _kernel_map.end ()) { |
| SmartPtr<CLContext> context = get_context (); |
| single_kernel = new CLKernel (context, info.kernel_name); |
| XCAM_ASSERT (single_kernel.ptr ()); |
| |
| if (access (cache_path, F_OK) == -1) { |
| mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); |
| } |
| |
| ret = cache_file.open (cache_filename, "r"); |
| if (ret == XCAM_RETURN_NO_ERROR) { |
| cache_file.get_file_size (read_cache_size); |
| if (read_cache_size > 0) { |
| kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1)); |
| if (NULL != kernel_cache) { |
| cache_file.read_file (kernel_cache, read_cache_size); |
| cache_file.close (); |
| |
| ret = single_kernel->load_from_binary (kernel_cache, read_cache_size); |
| xcam_free (kernel_cache); |
| kernel_cache = NULL; |
| |
| XCAM_FAIL_RETURN ( |
| ERROR, ret == XCAM_RETURN_NO_ERROR, ret, |
| "build kernel(%s) from binary failed", key_str); |
| |
| load_cache = true; |
| } |
| } |
| } else { |
| XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret); |
| } |
| |
| if (load_cache == false) { |
| ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options); |
| XCAM_FAIL_RETURN ( |
| ERROR, ret == XCAM_RETURN_NO_ERROR, ret, |
| "build kernel(%s) from source failed", key_str); |
| } |
| |
| _kernel_map.insert (std::make_pair (key, single_kernel)); |
| //_kernel_map[key] = single_kernel; |
| } else { |
| single_kernel = i_kernel->second; |
| } |
| } |
| |
| if (load_cache == false && NULL != kernel_cache) { |
| gettimeofday (&ts, NULL); |
| snprintf ( |
| temp_filename, XCAM_MAX_STR_SIZE - 1, |
| "%s." XCAM_TIMESTAMP_FORMAT, |
| cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts))); |
| |
| ret = temp_file.open (temp_filename, "wb"); |
| if (ret == XCAM_RETURN_NO_ERROR) { |
| ret = temp_file.write_file (kernel_cache, write_cache_size); |
| temp_file.close (); |
| if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) { |
| rename (temp_filename, cache_filename); |
| } else { |
| remove (temp_filename); |
| } |
| } else { |
| XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret); |
| } |
| xcam_free (kernel_cache); |
| kernel_cache = NULL; |
| } |
| |
| XCAM_FAIL_RETURN ( |
| ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN, |
| "build kernel(%s) failed, unknown error", key_str); |
| |
| ret = this->clone (single_kernel); |
| XCAM_FAIL_RETURN ( |
| ERROR, ret == XCAM_RETURN_NO_ERROR, ret, |
| "load kernel(%s) from kernel failed", key_str); |
| return ret; |
| } |
| |
| XCamReturn |
| CLKernel::load_from_source ( |
| const char *source, size_t length, |
| uint8_t **gen_binary, size_t *binary_size, |
| const char *build_option) |
| { |
| cl_kernel new_kernel_id = NULL; |
| |
| XCAM_ASSERT (source); |
| if (!source) { |
| XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name)); |
| return XCAM_RETURN_ERROR_PARAM; |
| } |
| |
| if (_kernel_id) { |
| XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); |
| return XCAM_RETURN_ERROR_PARAM; |
| } |
| |
| XCAM_ASSERT (_context.ptr ()); |
| |
| if (length == 0) |
| length = strlen (source); |
| |
| new_kernel_id = |
| _context->generate_kernel_id ( |
| this, |
| (const uint8_t *)source, length, |
| CLContext::KERNEL_BUILD_SOURCE, |
| gen_binary, binary_size, |
| build_option); |
| XCAM_FAIL_RETURN( |
| WARNING, |
| new_kernel_id != NULL, |
| XCAM_RETURN_ERROR_CL, |
| "cl kernel(%s) load from source failed", XCAM_STR (_name)); |
| |
| _kernel_id = new_kernel_id; |
| return XCAM_RETURN_NO_ERROR; |
| } |
| |
| XCamReturn |
| CLKernel::load_from_binary (const uint8_t *binary, size_t length) |
| { |
| cl_kernel new_kernel_id = NULL; |
| |
| XCAM_ASSERT (binary); |
| if (!binary || !length) { |
| XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name)); |
| return XCAM_RETURN_ERROR_PARAM; |
| } |
| |
| if (_kernel_id) { |
| XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); |
| return XCAM_RETURN_ERROR_PARAM; |
| } |
| |
| XCAM_ASSERT (_context.ptr ()); |
| |
| new_kernel_id = |
| _context->generate_kernel_id ( |
| this, |
| binary, length, |
| CLContext::KERNEL_BUILD_BINARY, |
| NULL, NULL, |
| NULL); |
| XCAM_FAIL_RETURN( |
| WARNING, |
| new_kernel_id != NULL, |
| XCAM_RETURN_ERROR_CL, |
| "cl kernel(%s) load from binary failed", XCAM_STR (_name)); |
| |
| _kernel_id = new_kernel_id; |
| return XCAM_RETURN_NO_ERROR; |
| } |
| |
| XCamReturn |
| CLKernel::clone (SmartPtr<CLKernel> kernel) |
| { |
| XCAM_FAIL_RETURN ( |
| WARNING, |
| kernel.ptr () && kernel->is_valid (), |
| XCAM_RETURN_ERROR_CL, |
| "cl kernel(%s) load from kernel failed", XCAM_STR (_name)); |
| _kernel_id = kernel->get_kernel_id (); |
| _parent_kernel = kernel; |
| if (!_name && kernel->get_kernel_name ()) { |
| _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE); |
| } |
| return XCAM_RETURN_NO_ERROR; |
| } |
| |
| XCamReturn |
| CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size) |
| { |
| XCamReturn ret = XCAM_RETURN_NO_ERROR; |
| uint32_t i_count = 0; |
| |
| XCAM_FAIL_RETURN ( |
| ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM, |
| "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ()); |
| |
| for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) { |
| const SmartPtr<CLArgument> &arg = *iter; |
| XCAM_FAIL_RETURN ( |
| WARNING, arg.ptr (), |
| XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count); |
| |
| void *adress = NULL; |
| uint32_t size = 0; |
| arg->get_value (adress, size); |
| ret = set_argument (i_count, adress, size); |
| XCAM_FAIL_RETURN ( |
| WARNING, ret == XCAM_RETURN_NO_ERROR, |
| ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count); |
| } |
| |
| ret = set_work_size (work_size); |
| XCAM_FAIL_RETURN ( |
| WARNING, ret == XCAM_RETURN_NO_ERROR, ret, |
| "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed", |
| XCAM_STR(get_kernel_name ()), |
| (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2], |
| (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]); |
| |
| _arg_list = args; |
| return ret; |
| } |
| |
| XCamReturn |
| CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size) |
| { |
| cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr); |
| if (error_code != CL_SUCCESS) { |
| XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i); |
| return XCAM_RETURN_ERROR_CL; |
| } |
| return XCAM_RETURN_NO_ERROR; |
| } |
| |
| XCamReturn |
| CLKernel::set_work_size (const CLWorkSize &work_size) |
| { |
| uint32_t i = 0; |
| uint32_t work_group_size = 1; |
| const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info (); |
| |
| XCAM_FAIL_RETURN ( |
| WARNING, |
| work_size.dim <= dev_info.max_work_item_dims, |
| XCAM_RETURN_ERROR_PARAM, |
| "kernel(%s) work dims(%d) greater than device max dims(%d)", |
| _name, work_size.dim, dev_info.max_work_item_dims); |
| |
| for (i = 0; i < work_size.dim; ++i) { |
| work_group_size *= work_size.local [i]; |
| |
| XCAM_FAIL_RETURN ( |
| WARNING, |
| work_size.local [i] <= dev_info.max_work_item_sizes [i], |
| XCAM_RETURN_ERROR_PARAM, |
| "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)", |
| _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]); |
| } |
| |
| XCAM_FAIL_RETURN ( |
| WARNING, |
| work_group_size == 0 || work_group_size <= dev_info.max_work_group_size, |
| XCAM_RETURN_ERROR_PARAM, |
| "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)", |
| _name, work_group_size, (uint32_t)dev_info.max_work_group_size); |
| |
| _work_size = work_size; |
| |
| return XCAM_RETURN_NO_ERROR; |
| } |
| |
| void |
| CLKernel::set_default_work_size () |
| { |
| _work_size.dim = XCAM_DEFAULT_IMAGE_DIM; |
| for (uint32_t i = 0; i < _work_size.dim; ++i) { |
| //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE; |
| _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE; |
| } |
| } |
| |
| struct KernelUserData { |
| SmartPtr<CLKernel> kernel; |
| SmartPtr<CLEvent> event; |
| CLArgList arg_list; |
| |
| KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e) |
| : kernel (k) |
| , event (e) |
| {} |
| }; |
| |
| void |
| CLKernel::event_notify (cl_event event, cl_int status, void* data) |
| { |
| KernelUserData *kernel_data = (KernelUserData *)data; |
| XCAM_ASSERT (event == kernel_data->event->get_event_id ()); |
| XCAM_UNUSED (status); |
| |
| delete kernel_data; |
| } |
| |
| XCamReturn |
| CLKernel::execute ( |
| const SmartPtr<CLKernel> self, |
| bool block, |
| CLEventList &events, |
| SmartPtr<CLEvent> &event_out) |
| { |
| XCAM_ASSERT (self.ptr () == this); |
| XCAM_ASSERT (_context.ptr ()); |
| SmartPtr<CLEvent> kernel_event = event_out; |
| |
| if (!block && !kernel_event.ptr ()) { |
| kernel_event = new CLEvent; |
| } |
| |
| #if ENABLE_DEBUG_KERNEL |
| XCAM_OBJ_PROFILING_START; |
| #endif |
| |
| XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event); |
| |
| XCAM_FAIL_RETURN ( |
| ERROR, |
| ret == XCAM_RETURN_NO_ERROR, |
| ret, |
| "kernel(%s) execute failed", XCAM_STR(_name)); |
| |
| |
| if (block) { |
| _context->finish (); |
| } else { |
| XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ()); |
| KernelUserData *user_data = new KernelUserData (self, kernel_event); |
| user_data->arg_list.swap (_arg_list); |
| ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data); |
| if (ret != XCAM_RETURN_NO_ERROR) { |
| XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name)); |
| _context->finish (); |
| delete user_data; |
| } |
| } |
| _arg_list.clear (); |
| |
| #if ENABLE_DEBUG_KERNEL |
| _context->finish (); |
| char name[1024]; |
| snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this); |
| XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM); |
| #endif |
| return ret; |
| } |
| |
| }; |