OpenCL error 48 when launching kernel -


continuing opencl adventure, have till cuda implementation. trying check if @ least first kernel call working got error 48 , have no idea missing. following example in this page

kernel

__kernel void clut_distributepixels(__global int *pixelgroup, int c_rows, int c_cols, int c_numcolors){      int x = get_global_id(0);     int y = get_global_id(1);      if (x >= c_cols || y >= c_rows) return;      int index = y * c_cols + x;      pixelgroup[index] = index/c_numcolors;  } 

read kernel file

char *file_contents(const char *filename, int *length){     file *f = fopen(filename, "r");     void *buffer;      if (!f) {         fprintf(stderr, "unable open %s reading\n", filename);         return null;     }      fseek(f, 0, seek_end);     *length = ftell(f);     fseek(f, 0, seek_set);      buffer = malloc(*length+1);     *length = fread(buffer, 1, *length, f);     fclose(f);     ((char*)buffer)[*length] = '\0';      return (char*)buffer; } 

code

#include <iostream> #include <opencl/opencl.h>  #include "utilities.hpp"  int main(int argc, const char * argv[]){      if (argc < 3) {         std::cout << "use: {gpu|cpu} ncolors" << std::endl;         return 1;     }      /************************************************             host side initialization      ************************************************/     int h_numcolors = atoi(argv[2]);      color *h_image;     int h_rows, h_cols;     if (readtext2rgb("lenaoriginal.txt", &h_image, &h_rows, &h_cols) != success){         return 1;     }      int *h_pixelgroup = new int[h_rows*h_cols];     color *h_grouprep = new color[h_numcolors];     color *h_clutimage = new color[h_rows*h_cols];     int h_change = 0;      /************************************************                 platform , device setup     ************************************************/      cl_int errorstatus;      //use first platform     cl_platform_id platform;     errorstatus = clgetplatformids(1, &platform, null);      //use first device matches type selected     cl_device_id device;     if (strcmp(argv[1], "cpu")){         errorstatus = clgetdeviceids(platform, cl_device_type_cpu, 1, &device, null);     }else if (strcmp(argv[1], "gpu")){         errorstatus = clgetdeviceids(platform, cl_device_type_gpu, 1, &device, null);     }else{         std::cout << "unknown device type. choose either cpu or gpu" << std::endl;         return 1;     }      //define context properties , create context     cl_context_properties contextprops[3] = {cl_context_platform, (cl_context_properties)platform, 0};     cl_context context = clcreatecontext(contextprops, 1, &device, null, null, &errorstatus);      //create command queue     cl_command_queue queue = clcreatecommandqueue(context, device, 0, &errorstatus);      /************************************************                 device variable setup      ************************************************/      cl_mem d_image;     cl_mem d_pixelgroup;     cl_mem d_grouprep;     cl_mem d_clutimage;     cl_mem d_change;      d_image = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(color)*h_rows*h_cols, h_image, &errorstatus);     d_pixelgroup = clcreatebuffer(context, cl_mem_read_write, sizeof(int)*h_rows*h_cols, null, &errorstatus);     d_grouprep = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_numcolors, null, &errorstatus);     d_clutimage = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_rows*h_cols, null, &errorstatus);     d_change = clcreatebuffer(context, cl_mem_write_only, sizeof(int), null, &errorstatus);      /************************************************         create, compile program , create kernel      ************************************************/      int pl;     size_t sourcelength;     char * sourcecode = file_contents("vectorquantization.cl", &pl);     sourcelength = (size_t)pl;      cl_program program = clcreateprogramwithsource(context, 1, (const char**)&sourcecode, &sourcelength, &errorstatus);      errorstatus = clbuildprogram(program, 0, null, null, null, null);      cl_kernel k_clut_distributepixels = clcreatekernel(program, "clut_distributepixels", &errorstatus);         errorstatus = clsetkernelarg(k_clut_distributepixels, 0, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_distributepixels, 1, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_distributepixels, 2, sizeof(cl_mem), (void*)&h_cols);         errorstatus = clsetkernelarg(k_clut_distributepixels, 3, sizeof(cl_mem), (void*)&h_numcolors);      cl_kernel k_clut_checkdistances = clcreatekernel(program, "clut_checkdistances", &errorstatus);         errorstatus = clsetkernelarg(k_clut_checkdistances, 0, sizeof(cl_mem), (void*)&d_image);         errorstatus = clsetkernelarg(k_clut_checkdistances, 1, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_checkdistances, 2, sizeof(cl_mem), (void*)&d_grouprep);         errorstatus = clsetkernelarg(k_clut_checkdistances, 3, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_checkdistances, 4, sizeof(cl_mem), (void*)&h_cols);         errorstatus = clsetkernelarg(k_clut_checkdistances, 5, sizeof(cl_mem), (void*)&h_numcolors);         errorstatus = clsetkernelarg(k_clut_checkdistances, 6, sizeof(cl_mem), (void*)&d_change);      cl_kernel k_clut_createimage = clcreatekernel(program, "clut_createimage", &errorstatus);         errorstatus = clsetkernelarg(k_clut_createimage, 0, sizeof(cl_mem), (void*)&d_clutimage);         errorstatus = clsetkernelarg(k_clut_createimage, 1, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_createimage, 2, sizeof(cl_mem), (void*)&d_grouprep);         errorstatus = clsetkernelarg(k_clut_createimage, 3, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_createimage, 4, sizeof(cl_mem), (void*)&h_cols);      /************************************************             execute program , results      ************************************************/      /*step 1: evenly distribute pixels among colors in clut */     size_t grid[2] = {static_cast<size_t>(h_rows), static_cast<size_t>(h_cols)};     errorstatus = clenqueuendrangekernel(queue, k_clut_distributepixels, 2, null, grid, null, 0, null, null);     clfinish(queue);      /*********/     /* error */     /*********/     errorstatus = clenqueuereadbuffer(queue, d_pixelgroup, cl_true, 0, sizeof(int)*h_rows*h_cols, h_pixelgroup, 0, null, null);      std::cout << h_pixelgroup[7] << ", " << h_pixelgroup[8] << ", " << h_pixelgroup[9] << ", " << h_pixelgroup[10] << std::endl;      //do {         /*step 2: compute reprenstative */          /*step 3: compute distances , reassign pixel group */          //copyfromconstantmemory     //} while (h_change != 0);      std::cout << "done !!" << std::endl;      return 0; } 

i found error. first of always check return values when learning new stuff. remember when learning cuda, simple macro started checking everything

#define cl_success_or_return(code) { \     assert(code == cl_success); \     if (code != cl_success) { return code; } \ }while (0); 

and error @ beginning when check if cpu or gpu. forgot strcmp returns 0 when strings equal. after fixing this, worked beautifully !!

anyways, if have other suggestion or advise or see ugly or not best practice in code please comment.


Comments

Popular posts from this blog

jquery - How can I dynamically add a browser tab? -

node.js - Getting the socket id,user id pair of a logged in user(s) -

keyboard - C++ GetAsyncKeyState alternative -