// statistical kernel __global__ void compress( long d_Ne, fp *d_I){ // pointer to output image (DEVICE GLOBAL MEMORY) // indexes int bx = blockIdx.x; // get current horizontal block index int tx = threadIdx.x; // get current horizontal thread index int ei = (bx*NUMBER_THREADS)+tx; // unique thread id, more threads than actual elements !!! // copy input to output & log uncompress if(ei