OpenCL error 48 when launching kernel -
continuing opencl adventure, have till cuda implementation. trying check if @ least first kernel call working got error 48 , have no idea missing. following example in this page
kernel
__kernel void clut_distributepixels(__global int *pixelgroup, int c_rows, int c_cols, int c_numcolors){ int x = get_global_id(0); int y = get_global_id(1); if (x >= c_cols || y >= c_rows) return; int index = y * c_cols + x; pixelgroup[index] = index/c_numcolors; }
read kernel file
char *file_contents(const char *filename, int *length){ file *f = fopen(filename, "r"); void *buffer; if (!f) { fprintf(stderr, "unable open %s reading\n", filename); return null; } fseek(f, 0, seek_end); *length = ftell(f); fseek(f, 0, seek_set); buffer = malloc(*length+1); *length = fread(buffer, 1, *length, f); fclose(f); ((char*)buffer)[*length] = '\0'; return (char*)buffer; }
code
#include <iostream> #include <opencl/opencl.h> #include "utilities.hpp" int main(int argc, const char * argv[]){ if (argc < 3) { std::cout << "use: {gpu|cpu} ncolors" << std::endl; return 1; } /************************************************ host side initialization ************************************************/ int h_numcolors = atoi(argv[2]); color *h_image; int h_rows, h_cols; if (readtext2rgb("lenaoriginal.txt", &h_image, &h_rows, &h_cols) != success){ return 1; } int *h_pixelgroup = new int[h_rows*h_cols]; color *h_grouprep = new color[h_numcolors]; color *h_clutimage = new color[h_rows*h_cols]; int h_change = 0; /************************************************ platform , device setup ************************************************/ cl_int errorstatus; //use first platform cl_platform_id platform; errorstatus = clgetplatformids(1, &platform, null); //use first device matches type selected cl_device_id device; if (strcmp(argv[1], "cpu")){ errorstatus = clgetdeviceids(platform, cl_device_type_cpu, 1, &device, null); }else if (strcmp(argv[1], "gpu")){ errorstatus = clgetdeviceids(platform, cl_device_type_gpu, 1, &device, null); }else{ std::cout << "unknown device type. choose either cpu or gpu" << std::endl; return 1; } //define context properties , create context cl_context_properties contextprops[3] = {cl_context_platform, (cl_context_properties)platform, 0}; cl_context context = clcreatecontext(contextprops, 1, &device, null, null, &errorstatus); //create command queue cl_command_queue queue = clcreatecommandqueue(context, device, 0, &errorstatus); /************************************************ device variable setup ************************************************/ cl_mem d_image; cl_mem d_pixelgroup; cl_mem d_grouprep; cl_mem d_clutimage; cl_mem d_change; d_image = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(color)*h_rows*h_cols, h_image, &errorstatus); d_pixelgroup = clcreatebuffer(context, cl_mem_read_write, sizeof(int)*h_rows*h_cols, null, &errorstatus); d_grouprep = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_numcolors, null, &errorstatus); d_clutimage = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_rows*h_cols, null, &errorstatus); d_change = clcreatebuffer(context, cl_mem_write_only, sizeof(int), null, &errorstatus); /************************************************ create, compile program , create kernel ************************************************/ int pl; size_t sourcelength; char * sourcecode = file_contents("vectorquantization.cl", &pl); sourcelength = (size_t)pl; cl_program program = clcreateprogramwithsource(context, 1, (const char**)&sourcecode, &sourcelength, &errorstatus); errorstatus = clbuildprogram(program, 0, null, null, null, null); cl_kernel k_clut_distributepixels = clcreatekernel(program, "clut_distributepixels", &errorstatus); errorstatus = clsetkernelarg(k_clut_distributepixels, 0, sizeof(cl_mem), (void*)&d_pixelgroup); errorstatus = clsetkernelarg(k_clut_distributepixels, 1, sizeof(cl_mem), (void*)&h_rows); errorstatus = clsetkernelarg(k_clut_distributepixels, 2, sizeof(cl_mem), (void*)&h_cols); errorstatus = clsetkernelarg(k_clut_distributepixels, 3, sizeof(cl_mem), (void*)&h_numcolors); cl_kernel k_clut_checkdistances = clcreatekernel(program, "clut_checkdistances", &errorstatus); errorstatus = clsetkernelarg(k_clut_checkdistances, 0, sizeof(cl_mem), (void*)&d_image); errorstatus = clsetkernelarg(k_clut_checkdistances, 1, sizeof(cl_mem), (void*)&d_pixelgroup); errorstatus = clsetkernelarg(k_clut_checkdistances, 2, sizeof(cl_mem), (void*)&d_grouprep); errorstatus = clsetkernelarg(k_clut_checkdistances, 3, sizeof(cl_mem), (void*)&h_rows); errorstatus = clsetkernelarg(k_clut_checkdistances, 4, sizeof(cl_mem), (void*)&h_cols); errorstatus = clsetkernelarg(k_clut_checkdistances, 5, sizeof(cl_mem), (void*)&h_numcolors); errorstatus = clsetkernelarg(k_clut_checkdistances, 6, sizeof(cl_mem), (void*)&d_change); cl_kernel k_clut_createimage = clcreatekernel(program, "clut_createimage", &errorstatus); errorstatus = clsetkernelarg(k_clut_createimage, 0, sizeof(cl_mem), (void*)&d_clutimage); errorstatus = clsetkernelarg(k_clut_createimage, 1, sizeof(cl_mem), (void*)&d_pixelgroup); errorstatus = clsetkernelarg(k_clut_createimage, 2, sizeof(cl_mem), (void*)&d_grouprep); errorstatus = clsetkernelarg(k_clut_createimage, 3, sizeof(cl_mem), (void*)&h_rows); errorstatus = clsetkernelarg(k_clut_createimage, 4, sizeof(cl_mem), (void*)&h_cols); /************************************************ execute program , results ************************************************/ /*step 1: evenly distribute pixels among colors in clut */ size_t grid[2] = {static_cast<size_t>(h_rows), static_cast<size_t>(h_cols)}; errorstatus = clenqueuendrangekernel(queue, k_clut_distributepixels, 2, null, grid, null, 0, null, null); clfinish(queue); /*********/ /* error */ /*********/ errorstatus = clenqueuereadbuffer(queue, d_pixelgroup, cl_true, 0, sizeof(int)*h_rows*h_cols, h_pixelgroup, 0, null, null); std::cout << h_pixelgroup[7] << ", " << h_pixelgroup[8] << ", " << h_pixelgroup[9] << ", " << h_pixelgroup[10] << std::endl; //do { /*step 2: compute reprenstative */ /*step 3: compute distances , reassign pixel group */ //copyfromconstantmemory //} while (h_change != 0); std::cout << "done !!" << std::endl; return 0; }
i found error. first of always check return values when learning new stuff. remember when learning cuda, simple macro started checking everything
#define cl_success_or_return(code) { \ assert(code == cl_success); \ if (code != cl_success) { return code; } \ }while (0);
and error @ beginning when check if cpu or gpu. forgot strcmp returns 0 when strings equal. after fixing this, worked beautifully !!
anyways, if have other suggestion or advise or see ugly or not best practice in code please comment.
Comments
Post a Comment