continuing opencl adventure, have till cuda implementation. trying check if @ least first kernel call working got error 48 , have no idea missing. following example in this page


__kernel void clut_distributepixels(__global int *pixelgroup, int c_rows, int c_cols, int c_numcolors){      int x = get_global_id(0);     int y = get_global_id(1);      if (x >= c_cols || y >= c_rows) return;      int index = y * c_cols + x;      pixelgroup[index] = index/c_numcolors;  } 

read kernel file

char *file_contents(const char *filename, int *length){     file *f = fopen(filename, "r");     void *buffer;      if (!f) {         fprintf(stderr, "unable open %s reading\n", filename);         return null;     }      fseek(f, 0, seek_end);     *length = ftell(f);     fseek(f, 0, seek_set);      buffer = malloc(*length+1);     *length = fread(buffer, 1, *length, f);     fclose(f);     ((char*)buffer)[*length] = '\0';      return (char*)buffer; } 


#include <iostream> #include <opencl/opencl.h>  #include "utilities.hpp"  int main(int argc, const char * argv[]){      if (argc < 3) {         std::cout << "use: {gpu|cpu} ncolors" << std::endl;         return 1;     }      /************************************************             host side initialization      ************************************************/     int h_numcolors = atoi(argv[2]);      color *h_image;     int h_rows, h_cols;     if (readtext2rgb("lenaoriginal.txt", &h_image, &h_rows, &h_cols) != success){         return 1;     }      int *h_pixelgroup = new int[h_rows*h_cols];     color *h_grouprep = new color[h_numcolors];     color *h_clutimage = new color[h_rows*h_cols];     int h_change = 0;      /************************************************                 platform , device setup     ************************************************/      cl_int errorstatus;      //use first platform     cl_platform_id platform;     errorstatus = clgetplatformids(1, &platform, null);      //use first device matches type selected     cl_device_id device;     if (strcmp(argv[1], "cpu")){         errorstatus = clgetdeviceids(platform, cl_device_type_cpu, 1, &device, null);     }else if (strcmp(argv[1], "gpu")){         errorstatus = clgetdeviceids(platform, cl_device_type_gpu, 1, &device, null);     }else{         std::cout << "unknown device type. choose either cpu or gpu" << std::endl;         return 1;     }      //define context properties , create context     cl_context_properties contextprops[3] = {cl_context_platform, (cl_context_properties)platform, 0};     cl_context context = clcreatecontext(contextprops, 1, &device, null, null, &errorstatus);      //create command queue     cl_command_queue queue = clcreatecommandqueue(context, device, 0, &errorstatus);      /************************************************                 device variable setup      ************************************************/      cl_mem d_image;     cl_mem d_pixelgroup;     cl_mem d_grouprep;     cl_mem d_clutimage;     cl_mem d_change;      d_image = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(color)*h_rows*h_cols, h_image, &errorstatus);     d_pixelgroup = clcreatebuffer(context, cl_mem_read_write, sizeof(int)*h_rows*h_cols, null, &errorstatus);     d_grouprep = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_numcolors, null, &errorstatus);     d_clutimage = clcreatebuffer(context, cl_mem_read_write, sizeof(color)*h_rows*h_cols, null, &errorstatus);     d_change = clcreatebuffer(context, cl_mem_write_only, sizeof(int), null, &errorstatus);      /************************************************         create, compile program , create kernel      ************************************************/      int pl;     size_t sourcelength;     char * sourcecode = file_contents("", &pl);     sourcelength = (size_t)pl;      cl_program program = clcreateprogramwithsource(context, 1, (const char**)&sourcecode, &sourcelength, &errorstatus);      errorstatus = clbuildprogram(program, 0, null, null, null, null);      cl_kernel k_clut_distributepixels = clcreatekernel(program, "clut_distributepixels", &errorstatus);         errorstatus = clsetkernelarg(k_clut_distributepixels, 0, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_distributepixels, 1, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_distributepixels, 2, sizeof(cl_mem), (void*)&h_cols);         errorstatus = clsetkernelarg(k_clut_distributepixels, 3, sizeof(cl_mem), (void*)&h_numcolors);      cl_kernel k_clut_checkdistances = clcreatekernel(program, "clut_checkdistances", &errorstatus);         errorstatus = clsetkernelarg(k_clut_checkdistances, 0, sizeof(cl_mem), (void*)&d_image);         errorstatus = clsetkernelarg(k_clut_checkdistances, 1, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_checkdistances, 2, sizeof(cl_mem), (void*)&d_grouprep);         errorstatus = clsetkernelarg(k_clut_checkdistances, 3, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_checkdistances, 4, sizeof(cl_mem), (void*)&h_cols);         errorstatus = clsetkernelarg(k_clut_checkdistances, 5, sizeof(cl_mem), (void*)&h_numcolors);         errorstatus = clsetkernelarg(k_clut_checkdistances, 6, sizeof(cl_mem), (void*)&d_change);      cl_kernel k_clut_createimage = clcreatekernel(program, "clut_createimage", &errorstatus);         errorstatus = clsetkernelarg(k_clut_createimage, 0, sizeof(cl_mem), (void*)&d_clutimage);         errorstatus = clsetkernelarg(k_clut_createimage, 1, sizeof(cl_mem), (void*)&d_pixelgroup);         errorstatus = clsetkernelarg(k_clut_createimage, 2, sizeof(cl_mem), (void*)&d_grouprep);         errorstatus = clsetkernelarg(k_clut_createimage, 3, sizeof(cl_mem), (void*)&h_rows);         errorstatus = clsetkernelarg(k_clut_createimage, 4, sizeof(cl_mem), (void*)&h_cols);      /************************************************             execute program , results      ************************************************/      /*step 1: evenly distribute pixels among colors in clut */     size_t grid[2] = {static_cast<size_t>(h_rows), static_cast<size_t>(h_cols)};     errorstatus = clenqueuendrangekernel(queue, k_clut_distributepixels, 2, null, grid, null, 0, null, null);     clfinish(queue);      /*********/     /* error */     /*********/     errorstatus = clenqueuereadbuffer(queue, d_pixelgroup, cl_true, 0, sizeof(int)*h_rows*h_cols, h_pixelgroup, 0, null, null);      std::cout << h_pixelgroup[7] << ", " << h_pixelgroup[8] << ", " << h_pixelgroup[9] << ", " << h_pixelgroup[10] << std::endl;      //do {         /*step 2: compute reprenstative */          /*step 3: compute distances , reassign pixel group */          //copyfromconstantmemory     //} while (h_change != 0);      std::cout << "done !!" << std::endl;      return 0; } 

i found error. first of always check return values when learning new stuff. remember when learning cuda, simple macro started checking everything

#define cl_success_or_return(code) { \     assert(code == cl_success); \     if (code != cl_success) { return code; } \ }while (0); 

and error @ beginning when check if cpu or gpu. forgot strcmp returns 0 when strings equal. after fixing this, worked beautifully !!

anyways, if have other suggestion or advise or see ugly or not best practice in code please comment.


