c++ - Issue with reading pixel RGB values in OpenCL -
i need read pixels 2 parts (with same width , height) of image ( e.g. squares ([0,0], [300, 300]) , ([400,0], [700,300])) , make difference each pixel.
this c (pseudo)code:
/** * @param img input image * @param pos integer position of top left corner of second square (in case 400) */ double getsum(image& img, int pos) { const int width_of_cut = 300; int right_bottom = pos + width; rgb first, second; double ret_val = 0.0; for(int i=0; < width_of_cut; i++) { for(int j=0; j < width_of_cut; j++) { first = img.getpixel( i, j ); second = img.getpixel( + pos, j ); ret_val += ( first.r - second.r ) + ( first.g - second.g ) + ( first.b - second.b ); } } return ret_val; } but kernel (with same arguments , __global float* output set 0.0 in host code) giving me different values:
__constant sampler_t sampler = clk_normalized_coords_false | clk_address_clamp_to_edge | clk_filter_nearest; __kernel void getsum( __read_only image2d_t input, const int x_coord, __global float* output ) { int width = get_image_width( input ); int height = get_image_height( input ); int2 pixelcoord = (int2) (get_global_id(0), get_global_id(1)); // image coordinates const int width_of_cut = 300; const int right_bottom = x_coord + width_of_cut; int a,b; = (int)(pixelcoord.x + x_coord); b = pixelcoord.y; if( < right_bottom && b < width_of_cut ) { float4 first = read_imagef(input, sampler, pixelcoord); float4 second = read_imagef(input, sampler, (int2)(a,b)); output[get_global_id(0)] += ((first.x - second.x) + (first.y - second.y) + (first.z - second.z)); } } i new opencl , have no idea doing wrong.
update (1d image):
i changed kernel code. i'm reading 1d image in 1 loop, i'm still not getting correct values. i'm not sure know, how read pixels 1d image correctly.
__kernel void getsum( __read_only image1d_t input, const int x_coord, __global float* output, const int img_width ) { const int width_of_cut = 300; int = (int)(get_global_id(0)); for(int j=0; j < width_of_cut; j++) { int f = ( img_width*i + j ); int s = f + x_coord; float4 first = read_imagef( input, sampler, f ); //pixel 1st sq. float4 second = read_imagef( input, sampler, s ); //pixel 2nd sq. output[get_global_id(0)] += ((first.x - second.x) + (first.y - second.y) + (first.z - second.z)); } }
race condition.
all vertical work items accessing same output memory (output[get_global_id(0)] +=) , not atomically. therefore result incorrect (e.g., 2 threads read same value, add it, , write back. 1 wins).
if device supports it, make atomic operation, slow. you'd better off running 1d kernel has loop accumulating these vertically (so, j loop c example).
Comments
Post a Comment