blob: 62dfcb78436e670f54a33b124f943dd4840e99bd [file] [log] [blame]
/*
* cl_kernel.cpp - CL kernel
*
* Copyright (c) 2015 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: Wind Yuan <[email protected]>
*/
#include "cl_kernel.h"
#include "cl_context.h"
#include "cl_device.h"
#include "file_handle.h"
#include <sys/stat.h>
#define ENABLE_DEBUG_KERNEL 0
#define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0
namespace XCam {
CLKernel::KernelMap CLKernel::_kernel_map;
Mutex CLKernel::_kernel_map_mutex;
static char*
default_cache_path () {
static char path[XCAM_MAX_STR_SIZE] = {0};
snprintf (
path, XCAM_MAX_STR_SIZE - 1,
"%s/%s", std::getenv ("HOME"), ".xcam/");
return path;
}
const char* CLKernel::_kernel_cache_path = default_cache_path ();
CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name)
: _name (NULL)
, _kernel_id (NULL)
, _context (context)
{
XCAM_ASSERT (context.ptr ());
//XCAM_ASSERT (name);
if (name)
_name = strndup (name, XCAM_MAX_STR_SIZE);
set_default_work_size ();
XCAM_OBJ_PROFILING_INIT;
}
CLKernel::~CLKernel ()
{
destroy ();
if (_name)
xcam_free (_name);
}
void
CLKernel::destroy ()
{
if (!_parent_kernel.ptr ())
_context->destroy_kernel_id (_kernel_id);
}
static void
get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8])
{
uint32_t key[2];
uint32_t *ptr = (uint32_t*)(str);
uint32_t aligned_len = 0;
uint32_t i = 0;
xcam_mem_clear (key);
if (!len)
len = strlen (str);
aligned_len = XCAM_ALIGN_DOWN (len, 8);
for (i = 0; i < aligned_len / 8; ++i) {
key[0] ^= ptr[0];
key[1] ^= ptr[1];
ptr += 2;
}
memcpy (key_id, key, 8);
len -= aligned_len;
str += aligned_len;
for (i = 0; i < len; ++i) {
key_id[i] ^= (uint8_t)str[i];
}
}
XCamReturn
CLKernel::build_kernel (const XCamKernelInfo& info, const char* options)
{
KernelMap::iterator i_kernel;
SmartPtr<CLKernel> single_kernel;
char key_str[1024];
uint8_t body_key[8];
std::string key;
XCamReturn ret = XCAM_RETURN_NO_ERROR;
XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null");
xcam_mem_clear (body_key);
get_string_key_id (info.kernel_body, info.kernel_body_len, body_key);
snprintf (
key_str, sizeof(key_str),
"%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s",
info.kernel_name,
body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7],
XCAM_STR(options));
key = key_str;
char temp_filename[XCAM_MAX_STR_SIZE] = {0};
char cache_filename[XCAM_MAX_STR_SIZE] = {0};
FileHandle temp_file;
FileHandle cache_file;
size_t read_cache_size = 0;
size_t write_cache_size = 0;
uint8_t *kernel_cache = NULL;
bool load_cache = false;
struct timeval ts;
const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH");
if (NULL == cache_path) {
cache_path = _kernel_cache_path;
}
snprintf (
cache_filename, XCAM_MAX_STR_SIZE - 1,
"%s/%s",
cache_path, key_str);
{
SmartLock locker (_kernel_map_mutex);
i_kernel = _kernel_map.find (key);
if (i_kernel == _kernel_map.end ()) {
SmartPtr<CLContext> context = get_context ();
single_kernel = new CLKernel (context, info.kernel_name);
XCAM_ASSERT (single_kernel.ptr ());
if (access (cache_path, F_OK) == -1) {
mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
}
ret = cache_file.open (cache_filename, "r");
if (ret == XCAM_RETURN_NO_ERROR) {
cache_file.get_file_size (read_cache_size);
if (read_cache_size > 0) {
kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1));
if (NULL != kernel_cache) {
cache_file.read_file (kernel_cache, read_cache_size);
cache_file.close ();
ret = single_kernel->load_from_binary (kernel_cache, read_cache_size);
xcam_free (kernel_cache);
kernel_cache = NULL;
XCAM_FAIL_RETURN (
ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
"build kernel(%s) from binary failed", key_str);
load_cache = true;
}
}
} else {
XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret);
}
if (load_cache == false) {
ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options);
XCAM_FAIL_RETURN (
ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
"build kernel(%s) from source failed", key_str);
}
_kernel_map.insert (std::make_pair (key, single_kernel));
//_kernel_map[key] = single_kernel;
} else {
single_kernel = i_kernel->second;
}
}
if (load_cache == false && NULL != kernel_cache) {
gettimeofday (&ts, NULL);
snprintf (
temp_filename, XCAM_MAX_STR_SIZE - 1,
"%s." XCAM_TIMESTAMP_FORMAT,
cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts)));
ret = temp_file.open (temp_filename, "wb");
if (ret == XCAM_RETURN_NO_ERROR) {
ret = temp_file.write_file (kernel_cache, write_cache_size);
temp_file.close ();
if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) {
rename (temp_filename, cache_filename);
} else {
remove (temp_filename);
}
} else {
XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret);
}
xcam_free (kernel_cache);
kernel_cache = NULL;
}
XCAM_FAIL_RETURN (
ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN,
"build kernel(%s) failed, unknown error", key_str);
ret = this->clone (single_kernel);
XCAM_FAIL_RETURN (
ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
"load kernel(%s) from kernel failed", key_str);
return ret;
}
XCamReturn
CLKernel::load_from_source (
const char *source, size_t length,
uint8_t **gen_binary, size_t *binary_size,
const char *build_option)
{
cl_kernel new_kernel_id = NULL;
XCAM_ASSERT (source);
if (!source) {
XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name));
return XCAM_RETURN_ERROR_PARAM;
}
if (_kernel_id) {
XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
return XCAM_RETURN_ERROR_PARAM;
}
XCAM_ASSERT (_context.ptr ());
if (length == 0)
length = strlen (source);
new_kernel_id =
_context->generate_kernel_id (
this,
(const uint8_t *)source, length,
CLContext::KERNEL_BUILD_SOURCE,
gen_binary, binary_size,
build_option);
XCAM_FAIL_RETURN(
WARNING,
new_kernel_id != NULL,
XCAM_RETURN_ERROR_CL,
"cl kernel(%s) load from source failed", XCAM_STR (_name));
_kernel_id = new_kernel_id;
return XCAM_RETURN_NO_ERROR;
}
XCamReturn
CLKernel::load_from_binary (const uint8_t *binary, size_t length)
{
cl_kernel new_kernel_id = NULL;
XCAM_ASSERT (binary);
if (!binary || !length) {
XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name));
return XCAM_RETURN_ERROR_PARAM;
}
if (_kernel_id) {
XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
return XCAM_RETURN_ERROR_PARAM;
}
XCAM_ASSERT (_context.ptr ());
new_kernel_id =
_context->generate_kernel_id (
this,
binary, length,
CLContext::KERNEL_BUILD_BINARY,
NULL, NULL,
NULL);
XCAM_FAIL_RETURN(
WARNING,
new_kernel_id != NULL,
XCAM_RETURN_ERROR_CL,
"cl kernel(%s) load from binary failed", XCAM_STR (_name));
_kernel_id = new_kernel_id;
return XCAM_RETURN_NO_ERROR;
}
XCamReturn
CLKernel::clone (SmartPtr<CLKernel> kernel)
{
XCAM_FAIL_RETURN (
WARNING,
kernel.ptr () && kernel->is_valid (),
XCAM_RETURN_ERROR_CL,
"cl kernel(%s) load from kernel failed", XCAM_STR (_name));
_kernel_id = kernel->get_kernel_id ();
_parent_kernel = kernel;
if (!_name && kernel->get_kernel_name ()) {
_name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE);
}
return XCAM_RETURN_NO_ERROR;
}
XCamReturn
CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size)
{
XCamReturn ret = XCAM_RETURN_NO_ERROR;
uint32_t i_count = 0;
XCAM_FAIL_RETURN (
ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM,
"cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ());
for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) {
const SmartPtr<CLArgument> &arg = *iter;
XCAM_FAIL_RETURN (
WARNING, arg.ptr (),
XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count);
void *adress = NULL;
uint32_t size = 0;
arg->get_value (adress, size);
ret = set_argument (i_count, adress, size);
XCAM_FAIL_RETURN (
WARNING, ret == XCAM_RETURN_NO_ERROR,
ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count);
}
ret = set_work_size (work_size);
XCAM_FAIL_RETURN (
WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
"cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed",
XCAM_STR(get_kernel_name ()),
(int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2],
(int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]);
_arg_list = args;
return ret;
}
XCamReturn
CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size)
{
cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr);
if (error_code != CL_SUCCESS) {
XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i);
return XCAM_RETURN_ERROR_CL;
}
return XCAM_RETURN_NO_ERROR;
}
XCamReturn
CLKernel::set_work_size (const CLWorkSize &work_size)
{
uint32_t i = 0;
uint32_t work_group_size = 1;
const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info ();
XCAM_FAIL_RETURN (
WARNING,
work_size.dim <= dev_info.max_work_item_dims,
XCAM_RETURN_ERROR_PARAM,
"kernel(%s) work dims(%d) greater than device max dims(%d)",
_name, work_size.dim, dev_info.max_work_item_dims);
for (i = 0; i < work_size.dim; ++i) {
work_group_size *= work_size.local [i];
XCAM_FAIL_RETURN (
WARNING,
work_size.local [i] <= dev_info.max_work_item_sizes [i],
XCAM_RETURN_ERROR_PARAM,
"kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)",
_name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]);
}
XCAM_FAIL_RETURN (
WARNING,
work_group_size == 0 || work_group_size <= dev_info.max_work_group_size,
XCAM_RETURN_ERROR_PARAM,
"kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)",
_name, work_group_size, (uint32_t)dev_info.max_work_group_size);
_work_size = work_size;
return XCAM_RETURN_NO_ERROR;
}
void
CLKernel::set_default_work_size ()
{
_work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
for (uint32_t i = 0; i < _work_size.dim; ++i) {
//_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE;
_work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE;
}
}
struct KernelUserData {
SmartPtr<CLKernel> kernel;
SmartPtr<CLEvent> event;
CLArgList arg_list;
KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e)
: kernel (k)
, event (e)
{}
};
void
CLKernel::event_notify (cl_event event, cl_int status, void* data)
{
KernelUserData *kernel_data = (KernelUserData *)data;
XCAM_ASSERT (event == kernel_data->event->get_event_id ());
XCAM_UNUSED (status);
delete kernel_data;
}
XCamReturn
CLKernel::execute (
const SmartPtr<CLKernel> self,
bool block,
CLEventList &events,
SmartPtr<CLEvent> &event_out)
{
XCAM_ASSERT (self.ptr () == this);
XCAM_ASSERT (_context.ptr ());
SmartPtr<CLEvent> kernel_event = event_out;
if (!block && !kernel_event.ptr ()) {
kernel_event = new CLEvent;
}
#if ENABLE_DEBUG_KERNEL
XCAM_OBJ_PROFILING_START;
#endif
XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event);
XCAM_FAIL_RETURN (
ERROR,
ret == XCAM_RETURN_NO_ERROR,
ret,
"kernel(%s) execute failed", XCAM_STR(_name));
if (block) {
_context->finish ();
} else {
XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ());
KernelUserData *user_data = new KernelUserData (self, kernel_event);
user_data->arg_list.swap (_arg_list);
ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data);
if (ret != XCAM_RETURN_NO_ERROR) {
XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name));
_context->finish ();
delete user_data;
}
}
_arg_list.clear ();
#if ENABLE_DEBUG_KERNEL
_context->finish ();
char name[1024];
snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this);
XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM);
#endif
return ret;
}
};