123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707 |
- #define THREADS 256
- #define BOUNDARY_X 2
- int divRndUp(int n,
- int d)
- {
- return (n / d) + ((n % d) ? 1 : 0);
- }
- /* Store 3 RGB float components */
- /*void storeComponents(__global float *d_r, __global float *d_g, __global float *d_b, __global const float r, __global const float g, __global const float b, int pos)
- {
- d_r[pos] = (r/255.0f) - 0.5f;
- d_g[pos] = (g/255.0f) - 0.5f;
- d_b[pos] = (b/255.0f) - 0.5f;
- }
- */
- // Store 3 RGB intege components
- void storeComponents(__global int *d_r,
- __global int *d_g,
- __global int *d_b,
- int r,
- int g,
- int b,
- int pos)
- {
- d_r[pos] = r - 128;
- d_g[pos] = g - 128;
- d_b[pos] = b - 128;
- }
- /* Store float component */
- /*__kernel void storeComponent(__global float *d_c, __global const float c, int pos)
- {
- d_c[pos] = (c/255.0f) - 0.5f;
- }
- */
- // Store integer component
- void storeComponent(__global int *d_c,
- const int c,
- int pos)
- {
- d_c[pos] = c - 128;
- }
- // Copy img src data into three separated component buffers
- __kernel void c_CopySrcToComponents (__global int *d_r,
- __global int *d_g,
- __global int *d_b,
- __global unsigned char * cl_d_src,
- int pixels)
- {
- int x = get_local_id(0);
- int gX= get_local_size(0) * get_group_id(0);
-
- __local unsigned char sData[THREADS*3];
-
- // Copy data to shared mem by 4bytes
- // other checks are not necessary, since
- // cl_d_src buffer is aligned to sharedDataSize
- sData[3 * x + 0] = cl_d_src [gX * 3 + 3 * x + 0];
- sData[3 * x + 1] = cl_d_src [gX * 3 + 3 * x + 1];
- sData[3 * x + 2] = cl_d_src [gX * 3 + 3 * x + 2];
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int r, g, b;
- int offset = x*3;
- r = (int)(sData[offset]);
- g = (int)(sData[offset+1]);
- b = (int)(sData[offset+2]);
-
- int globalOutputPosition = gX + x;
- if (globalOutputPosition < pixels)
- {
- storeComponents(d_r, d_g, d_b, r, g, b, globalOutputPosition);
- }
- }
- // Copy img src data into three separated component buffers
- __kernel void c_CopySrcToComponent (__global int *d_c,
- __global unsigned char * cl_d_src,
- int pixels)
- {
- int x = get_local_id(0);
- int gX = get_local_size(0) * get_group_id(0);
-
- __local unsigned char sData[THREADS];
-
- sData[ x ] = cl_d_src [gX + x];
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int c;
-
- c = (int) (sData[x]);
-
- int globalOutputPosition = gX + x;
- if (globalOutputPosition < pixels)
- {
- storeComponent(d_c, c, globalOutputPosition);
- }
-
- }
- static void mirror( int *d,
- const int sizeD)
- {
- if ((*d )>= sizeD)
- {
- (*d) = 2 * sizeD -2 - (*d);
- } else if((*d) < 0)
- {
- (*d) = -(*d) ;
- }
- }
- struct VerticalDWTPixelIO
- {
- bool CHECKED;
- int end, stride;
- };
- int initialize_PixelIO(struct VerticalDWTPixelIO *pIO,
- bool CHECK,
- const int sizeX,
- const int sizeY,
- int firstX,
- int firstY)
- {
- pIO->CHECKED = CHECK;
- pIO->end = pIO->CHECKED ? (sizeY * sizeX + firstX) : 0 ;
- pIO->stride = sizeX;
- return firstX + sizeX * firstY;
- }
- struct VerticalDWTPixelLoader
- {
- bool CHECKED;
- int last;
- };
- void init_PixelLoader(struct VerticalDWTPixelLoader *loader,
- const int sizeX,
- const int sizeY,
- int firstX,
- const int firstY,
- struct VerticalDWTPixelIO *pIO,
- bool CHECK )
- {
- mirror (&firstX, sizeX);
- loader->last = initialize_PixelIO (pIO, CHECK, sizeX, sizeY, firstX, firstY) - sizeX;
- }
- void clear_PixelLoader(struct VerticalDWTPixelLoader *pLoader,
- struct VerticalDWTPixelIO *pIO)
- {
- pLoader->last = 0;
- pIO->end = 0 ;
- pIO->stride = 0 ;
- }
- int loadFrom(struct VerticalDWTPixelLoader *pLoader,
- __global const int * const input,
- struct VerticalDWTPixelIO *pIO,
- int CHECK)
- {
- pLoader->last += pIO->stride;
- if(CHECK && (pLoader->last == pIO->end))
- {
- pLoader->last -= 2 * pIO->stride;
- pIO->stride = 0 - pIO->stride;
- }
- return input[pLoader->last];
- }
- struct VerticalDWTBandIO
- {
- bool CHECKED;
- int end;
- int strideHighToLow;
- int strideLowToHigh;
- };
- int initialize_BandIO(struct VerticalDWTBandIO *bandIO,
- const int imageSizeX,
- const int imageSizeY,
- int firstX,
- int firstY)
- {
- int columnOffset = firstX / 2;
- int verticalStride;
-
- if(firstX & 1)
- {
- verticalStride = imageSizeX / 2;
- columnOffset += divRndUp(imageSizeX, 2) * divRndUp(imageSizeY, 2);
- bandIO->strideLowToHigh = (imageSizeX * imageSizeY) / 2;
- }
- else
- {
- verticalStride = imageSizeX / 2 + (imageSizeX & 1);
- bandIO->strideLowToHigh = divRndUp(imageSizeY, 2) * imageSizeX;
- }
-
- bandIO->strideHighToLow = verticalStride - bandIO->strideLowToHigh;
-
- if (bandIO->CHECKED)
- {
- bandIO->end = columnOffset + (imageSizeY / 2) * verticalStride + (imageSizeY & 1) * bandIO->strideLowToHigh ;
- }
- else
- {
- bandIO->end = 0;
- }
-
- return columnOffset + (firstY / 2) * verticalStride // right row
- + (firstY & 1) * bandIO->strideLowToHigh;
- }
- struct VerticalDWTBandLoader
- {
- bool CHECKED;
- int last;
- };
- struct VerticalDWTBandWriter
- {
- bool CHECKED;
- int next;
- };
- int saveAndUpdate(struct VerticalDWTBandWriter *writer,
- bool CHECK,
- struct VerticalDWTBandIO *bandIO,
- __global int * const output,
- __local int *item,
- int *stride)
- {
- writer->CHECKED = CHECK;
- if((!writer->CHECKED) || (writer->next != bandIO->end) )
- {
- output[writer->next] = *item;
- writer->next += *stride;
- }
- return writer->next;
- }
- void clear_BandWriter(struct VerticalDWTBandWriter *writer,
- struct VerticalDWTBandIO *bandIO)
- {
- bandIO->end = 0;
- bandIO->strideHighToLow = 0;
- bandIO->strideLowToHigh = 0;
- writer->next = 0 ;
- }
- void init_BandWriter(struct VerticalDWTBandWriter *writer,
- struct VerticalDWTBandIO *bandIO,
- const int imageSizeX,
- const int imageSizeY,
- const int firstX,
- const int firstY)
- {
- if (firstX < imageSizeX)
- {
- writer->next = initialize_BandIO (bandIO, imageSizeX, imageSizeY, firstX, firstY);
- }
- else
- {
- clear_BandWriter (writer , bandIO) ;
- }
- }
- int writeLowInto(struct VerticalDWTBandWriter *writer,
- struct VerticalDWTBandIO *bandIO,
- __global int * const output,
- __local int *primary)
- {
- return saveAndUpdate(writer, writer->CHECKED, bandIO, output, primary, &(bandIO->strideLowToHigh));
- }
- int writeHighInto(struct VerticalDWTBandWriter *writer, struct VerticalDWTBandIO *bandIO, __global int * const output, __local int *other)
- {
- return saveAndUpdate(writer, writer->CHECKED, bandIO, output, other, &(bandIO->strideHighToLow));
- }
- //TransformBuffer is contained in cuda_gwt/transform_buffer.h
- struct TransformBuffer
- {
- int SIZE_X, SIZE_Y;
- int VERTICAL_STRIDE;
- int SHM_BANKS, BUFFER_SIZE, PADDING, ODD_OFFSET;
-
- /// buffer for both even and odd columns
- int data[2182]; //data[2 * BUFFER_SIZE + PADDING]
- };
- void horizontalStep (__local struct TransformBuffer *buffer,
- const int count,
- const int prevOffset,
- const int midOffset,
- const int nextOffset,
- int flag)
- {
- const int STEPS = count / buffer->SIZE_X;
- const int finalCount = count % buffer->SIZE_X;
- const int finalOffset = count - finalCount;
- for(int i = 0; i< STEPS; i++)
- {
- const int previous = buffer->data[prevOffset + i * buffer->SIZE_X + get_local_id(0)] ;
- const int next = buffer->data[nextOffset + i * buffer->SIZE_X + get_local_id(0)];
- __local int * center = & (buffer->data[midOffset + i * buffer->SIZE_X + get_local_id(0)]);
- if (flag == 0)
- {
- *center -= (previous + next) /2; //Forward53Predict()
- } else if (flag == 1)
- {
- *center += (previous + next + 2) /4; //Forward53Update()
- }
- }
-
- if(get_local_id(0) < finalCount) {
- const int previous = buffer->data[prevOffset + finalOffset + get_local_id(0)];
- const int next = buffer->data[nextOffset + finalOffset + get_local_id(0)];
- __local int * center = & (buffer->data[midOffset + finalOffset + get_local_id(0)]);
-
- if (flag == 0)
- {
- *center -= (previous + next) /2; //Forward53Predict()
- } else if (flag == 1)
- {
- *center += (previous + next + 2) /4; //Forward53Update()
- }
- }
-
- }
- void forEachHorizontalOdd(__local struct TransformBuffer *buffer,
- const int firstLine,
- const int numLines,
- int flag)
- {
- const int count = numLines * buffer->VERTICAL_STRIDE - 1 ;
- const int prevOffset = firstLine * buffer->VERTICAL_STRIDE ;
- const int centerOffset = prevOffset + buffer->ODD_OFFSET ;
- const int nextOffset = prevOffset + 1;
-
- horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag);
- }
- void forEachHorizontalEven(__local struct TransformBuffer *buffer,
- const int firstLine,
- const int numLines,
- int flag)
- {
- const int count = numLines * buffer->VERTICAL_STRIDE - 1 ;
- const int centerOffset = firstLine * buffer->VERTICAL_STRIDE + 1;
- const int prevOffset = firstLine * buffer->VERTICAL_STRIDE + buffer->ODD_OFFSET;
- const int nextOffset = prevOffset + 1;
-
- horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag);
- }
- void forEachVerticalOdd (__local struct TransformBuffer *buffer,
- const int columnOffset,
- int flag)
- {
- int steps = (buffer->SIZE_Y - 1) / 2;
- for (int i = 0; i < steps; i++)
- {
- int row = i * 2 + 1;
- int prev = buffer->data[columnOffset+ (row - 1) * buffer->VERTICAL_STRIDE];
- int next = buffer->data[columnOffset+ (row + 1) * buffer->VERTICAL_STRIDE];
- if (flag == 0)
- {
- buffer->data[columnOffset + row * buffer->VERTICAL_STRIDE] -= (prev + next) /2;
- }
- else if (flag == 1)
- {
- //buffer->data[columnOffset + row * buffer->VERTICAL_STRIDE] += (prev + next + 2) /4;
- }
- }
- }
- void forEachVerticalEven (__local struct TransformBuffer *buffer,
- const int columnOffset,
- int flag)
- {
- int i ;
- if(buffer->SIZE_Y > 3)
- {
- int steps = (int)( buffer->SIZE_Y / 2) -1 ;
-
- for(i = 0; i < steps; i++)
- {
- int row = 2 + i * 2;
- int prev = buffer->data[columnOffset+ (row - 1) * buffer->VERTICAL_STRIDE];
- int next = buffer->data[columnOffset + (row + 1) * buffer->VERTICAL_STRIDE];
-
- if (flag == 0)
- {
- //buffer->data[columnOffset + row * buffer->VERTICAL_STRIDE] -= (prev + next) /2;
- }
- else if (flag == 1)
- {
- buffer->data[columnOffset + row * buffer->VERTICAL_STRIDE] += (prev + next + 2)/4; //real one
- }
-
- }
- }
- }
- struct FDWT53Column
- {
- bool CHECKED_LOADER;
- // loader for the column
- struct VerticalDWTPixelLoader loader;
- /// offset of the column in shared buffer
- int offset;
- // backup of first 3 loaded pixels (not transformed)
- int pixel0, pixel1, pixel2;
- };
- void clear_FDWT53Column(struct FDWT53Column *st_FDWT53Column,
- struct VerticalDWTPixelIO *pIO)
- {
- st_FDWT53Column->offset = 0;
- st_FDWT53Column->pixel0 = 0;
- st_FDWT53Column->pixel1 = 0;
- st_FDWT53Column->pixel2 = 0;
- clear_PixelLoader(&(st_FDWT53Column->loader), pIO);
- }
- struct FDWT53 {
- int WIN_SIZE_X, WIN_SIZE_Y;
- struct FDWT53Column column;
- /// Type of shared memory buffer for 5/3 FDWT transforms.
- /// Actual shared buffer used for forward 5/3 DWT.
- struct TransformBuffer buffer;
-
- /// Difference between indices of two vertical neighbors in buffer.
- int STRIDE;
- };
- //in from transform_buffer.h
- int getColumnOffset(int columnIndex,
- __local struct TransformBuffer * buffer)
- {
- columnIndex += BOUNDARY_X;
- return columnIndex / 2 // select right column
- + (columnIndex & 1) * buffer->ODD_OFFSET; // select odd or even buffer
- }
- void initColumn(__local struct FDWT53 * fdwt53,
- struct FDWT53Column *column,
- bool CHECKED,
- __global const int * const input,
- const int sizeX,
- const int sizeY,
- const int colIndex,
- const int firstY,
- struct VerticalDWTPixelIO *pIO)
- {
- column->CHECKED_LOADER = CHECKED;
- column->offset = getColumnOffset(colIndex, &fdwt53->buffer);
-
- const int firstX = get_group_id(0) * fdwt53->WIN_SIZE_X + colIndex;
- if(get_group_id(1) == 0)
- {
- // topmost block - apply mirroring rules when loading first 3 rows
- init_PixelLoader(&(column->loader), sizeX, sizeY, firstX, firstY, pIO, CHECKED);
- column->pixel2 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #0
- column->pixel1 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #1
- column->pixel0 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #2
- init_PixelLoader(&(column->loader), sizeX, sizeY, firstX, firstY + 1, pIO, CHECKED);
- }
- else
- {
- init_PixelLoader(&(column->loader), sizeX, sizeY, firstX, firstY - 2, pIO, CHECKED);
- column->pixel0 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #0
- column->pixel1 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #1
- column->pixel2 = loadFrom(&(column->loader),input, pIO, CHECKED); // loaded pixel #2
- }
-
- }
- void loadAndVerticallyTransform (__local struct FDWT53 *fdwt53,
- struct FDWT53Column *column,
- bool CHECKED,
- __global const int * const input,
- struct VerticalDWTPixelIO *pIO)
- {
- fdwt53->buffer.data[column->offset + 0 * fdwt53->STRIDE] = column->pixel0;
- fdwt53->buffer.data[column->offset + 1 * fdwt53->STRIDE] = column->pixel1;
- fdwt53->buffer.data[column->offset + 2 * fdwt53->STRIDE] = column->pixel2;
-
- for (int i = 3; i < (3 + fdwt53->WIN_SIZE_Y); i++)
- {
- fdwt53->buffer.data[column->offset + i * fdwt53->STRIDE] = loadFrom(&(column->loader),input, pIO, CHECKED);
- }
- column->pixel0 = fdwt53->buffer.data [column->offset + ( fdwt53->WIN_SIZE_Y + 0 ) * fdwt53->STRIDE] ;
- column->pixel1 = fdwt53->buffer.data [column->offset + ( fdwt53->WIN_SIZE_Y + 1 ) * fdwt53->STRIDE] ;
- column->pixel2 = fdwt53->buffer.data [column->offset + ( fdwt53->WIN_SIZE_Y + 2 ) * fdwt53->STRIDE] ;
-
-
- int flag = 0 ;
- forEachVerticalOdd (&fdwt53->buffer, column->offset, flag);
- flag = 1 ;
- forEachVerticalEven(&fdwt53->buffer, column->offset, flag);
- }
- void transform(__local struct FDWT53 *fdwt53,
- bool CHECK_LOADS,
- bool CHECK_WRITES,
- __global const int * const in,
- __global int * out,
- const int sizeX,
- const int sizeY,
- const int winSteps)
- {
- // info about one main and one boundary columns processed by this thread
- struct FDWT53Column column; column.CHECKED_LOADER = CHECK_LOADS;
- struct VerticalDWTPixelIO pIO;
- struct FDWT53Column boundaryColumn; boundaryColumn.CHECKED_LOADER = CHECK_LOADS;
- struct VerticalDWTPixelIO pIO_b;
- // Initialize all column info: initialize loaders, compute offset of
- // column in shared buffer and initialize loader of column.
- const int firstY = get_group_id(1) * fdwt53->WIN_SIZE_Y * winSteps;
- initColumn(fdwt53, &column, CHECK_LOADS, in, sizeX, sizeY, get_local_id(0), firstY, &pIO);
-
- // first 3 threads initialize boundary columns, others do not use them
- clear_FDWT53Column(&boundaryColumn, &pIO_b);
- if (get_local_id(0) < 3) {
- // index of boundary column (relative x-axis coordinate of the column)
- const int colId = get_local_id(0) + ((get_local_id(0)== 0) ? fdwt53->WIN_SIZE_X : -3);
-
- // initialize the column
- initColumn (fdwt53, &boundaryColumn, CHECK_LOADS, in, sizeX, sizeY, colId, firstY, &pIO_b);
- }
-
- // index of column which will be written into output by this thread
- const int outColumnIndex = (get_local_id(0) * 2) - (fdwt53->WIN_SIZE_X - 1) * (get_local_id(0) / ( fdwt53->WIN_SIZE_X / 2));
-
- // offset of column which will be written by this thread into output
- const int outColumnOffset = getColumnOffset(outColumnIndex, &(fdwt53->buffer));
-
- // initialize output writer for this thread
- const int outputFirstX = get_group_id(0) * fdwt53->WIN_SIZE_X +outColumnIndex;
- struct VerticalDWTBandWriter writer; writer.CHECKED = CHECK_WRITES;
- struct VerticalDWTBandIO bandIO; bandIO.CHECKED = CHECK_WRITES;
-
- init_BandWriter(&writer, &bandIO, sizeX, sizeY, outputFirstX, firstY);
-
- // Sliding window iterations:
- // Each iteration assumes that first 3 pixels of each column are loaded.
- for(int w = 0; w < winSteps; w++)
- {
- loadAndVerticallyTransform(fdwt53, &column, CHECK_LOADS, in, &pIO);
- if (get_local_id(0) < 3)
- {
- loadAndVerticallyTransform(fdwt53, &boundaryColumn, CHECK_LOADS, in, &pIO_b);
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- int flag = 0; //flag = 0 execute Forward53Predict, flag = 1 execute Forward53Update
- forEachHorizontalOdd(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag);
- barrier(CLK_LOCAL_MEM_FENCE);
- flag = 1;
- forEachHorizontalEven(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- for(int r = 2; r < (2+fdwt53->WIN_SIZE_Y); r+= 2)
- {
- writeLowInto(&writer, &bandIO, out, &(fdwt53->buffer.data[outColumnOffset + r * fdwt53->buffer.VERTICAL_STRIDE]));
- writeHighInto(&writer, &bandIO, out, &(fdwt53->buffer.data[outColumnOffset + (r+1) * fdwt53->buffer.VERTICAL_STRIDE]));
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- }
- // Forward 5/3 DWT predict operation.
- void Forward53Predict (const int p,
- __global int * c,
- const int n)
- {
- *c -= (p + n) /2;
- }
- // Forward 5/3 DWT update operation.
- void Forward53Update (const int p,
- __global int * c,
- const int n)
- {
- *c += (p + n + 2) /4;
- }
- __kernel void cl_fdwt53Kernel(__global const int * const in,
- __global int * out,
- const int sx,
- const int sy,
- const int steps,
- int WIN_SIZE_X,
- int WIN_SIZE_Y)
- {
- __local struct FDWT53 fdwt53;
- fdwt53.WIN_SIZE_X = WIN_SIZE_X;
- fdwt53.WIN_SIZE_Y = WIN_SIZE_Y;
-
- //initialize
- //Lingjie Zhang modified on 11/02/2015
- //for(int i = 0; i < sizeof(fdwt53.buffer)/sizeof(int); i++){
- for(int i = 0; i < sizeof(fdwt53.buffer.data)/sizeof(int); i++){
- fdwt53.buffer.data[i] = 0;
- }
- //end of Lingjie Zhang modification
-
- fdwt53.buffer.SIZE_X = fdwt53.WIN_SIZE_X;
- fdwt53.buffer.SIZE_Y = fdwt53.WIN_SIZE_Y + 3;
- fdwt53.buffer.VERTICAL_STRIDE = BOUNDARY_X + (fdwt53.buffer.SIZE_X / 2);//BOUNDARY = 2
- fdwt53.buffer.SHM_BANKS = 32; // SHM_BANKS = ((__CUDA_ARCH__ >= 200) ? 32 : 16)
- fdwt53.buffer.BUFFER_SIZE = fdwt53.buffer.VERTICAL_STRIDE * fdwt53.buffer.SIZE_Y;
- fdwt53.buffer.PADDING = fdwt53.buffer.SHM_BANKS - ((fdwt53.buffer.BUFFER_SIZE + fdwt53.buffer.SHM_BANKS / 2) % fdwt53.buffer.SHM_BANKS) ;
- fdwt53.buffer.ODD_OFFSET = fdwt53.buffer.BUFFER_SIZE + fdwt53.buffer.PADDING ;
- fdwt53.STRIDE = fdwt53.buffer.VERTICAL_STRIDE ;
- const int maxX = (get_group_id(0) + 1) * WIN_SIZE_X + 1;
- const int maxY = (get_group_id(1) + 1) * WIN_SIZE_Y * steps + 1;
- const bool atRightBoudary = maxX >= sx;
- const bool atBottomBoudary = maxY >= sy;
-
- // Select specialized version of code according to distance of this
- // threadblock's pixels from image boundary.
- if(atBottomBoudary)
- {
- // near bottom boundary => check both writing and reading
- transform(&fdwt53, true, true, in, out, sx, sy, steps);
- }
- else if(atRightBoudary)
- {
- // near right boundary only => check writing only
- transform(&fdwt53, false, true, in, out, sx, sy, steps);
- }
- else
- {
- // no nearby boundary => check nothing
- transform(&fdwt53, false, false, in, out, sx, sy, steps);
- }
- }
|