OpenCL - 内核方法返回意外结果

OpenCL - Kernel method returns unexpected results

本文关键字:意外 结果 返回 方法 内核 OpenCL      更新时间:2023-10-16

我是OpenCL的初学者。我尝试运行一个非常简单的内核代码,向量的每个值加 1。一切运行良好,没有返回错误代码(我在每一步后检查了返回值(。源代码:

cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj , resobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t work_units_per_kernels;
int input[10] =  {1,2,3,4,5,6,7,8,9,10};
int output[10];
int length = 10 ;

FILE *fp;
char fileName[] = "/home/tuan/OpenCLPlayaround/hello.cl";
char *source_str;
size_t source_size;
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.n");
exit(1);
}
source_str = (char*)malloc(0x100000);
source_size = fread(source_str,1,0x100000, fp);
fclose(fp);
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
std::cout<<ret<<" code"<<std::endl;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
std::cout<<ret<<" code"<<std::endl;
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
std::cout<<ret<<" code"<<std::endl;
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
//Check Concept of memory
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,length * sizeof(int), input, &ret);

resobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length * sizeof(int), output, &ret);
std::cout<<ret<<" code"<<std::endl;

program = clCreateProgramWithSource(context,1,(const char**)&source_str, (const size_t*)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, "hello", &ret);
ret = clSetKernelArg(kernel,0, sizeof(memobj),(void *)&memobj);
ret = clSetKernelArg(kernel,1, sizeof(resobj),(void *)&resobj);
ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
ret = clEnqueueReadBuffer(command_queue, resobj, CL_TRUE, 0, length* sizeof(int),output, 0, NULL, NULL);
for (int i = 0 ; i <10 ; i++) {
    std::cout<<output[i]<<" "<<std::endl;
}
return 0;

结果有些奇怪,而它应该是{2,3,4,5,6,7,8,9,10,11}

2 
-16777216 
65535 
1 
-1242789408 
32767 
4201449 
0 
2 
0 

和我的内核:

__kernel void hello(__global int* a, __global int* b)
{
    int sam = 0;
    int gid = get_global_id(0);
    b[gid] = sam + a[gid] +1 ;
}

有人可以解释为什么吗?它让我头破了几个小时!

clEnqueueTask相当于

调用clEnqueueNDRangeKernelwork_dim = 1global_work_offset = NULLglobal_work_size[0]设置为1local_work_size[0]设置为1

所以使用clEnqueueNDRangeKernel.