#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #include "components.h" #include "dwt.h" //using namespace std; #ifdef __APPLE__ #include #else #include #endif #define THREADS 256 struct dwt { char * srcFilename; char * outFilename; unsigned char *srcImg; int pixWidth; int pixHeight; int components; int dwtLvls; }; cl_context context = 0; cl_command_queue commandQueue = 0; cl_program program = 0; cl_device_id cldevice = 0; cl_kernel kernel = 0; cl_kernel c_CopySrcToComponents = 0; cl_kernel c_CopySrcToComponent = 0; cl_kernel kl_fdwt53Kernel; cl_mem memObjects[3] = { 0, 0, 0 }; cl_int errNum = 0; /// // functions for preparing create opencl program, contains CreateContext, CreateProgram, CreateCommandQueue, CreateMemBuffer, and Cleanup // Create an OpenCL context on the first available GPU platform. cl_context CreateContext(int platform_id, int use_gpu) { cl_context context = NULL; cl_uint platformIdCount = 0; cl_int errNum; cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; // get number of platforms clGetPlatformIDs (0, NULL, &platformIdCount); std::vector platformIds(platformIdCount); clGetPlatformIDs (platformIdCount, platformIds.data(), NULL); // In this example, first platform is a CPU, the second one is a GPU. we just choose the first available device. cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[platform_id], 0 }; context = clCreateContextFromType(contextProperties, device_type, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; return NULL; } return context; } /// // Create a command queue on the first device available on the context // cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *cldevice, int device_id) { cl_int errNum; cl_device_id *cldevices; cl_command_queue commandQueue = NULL; size_t deviceBufferSize = -1; // First get the size of the devices buffer errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize); if (errNum != CL_SUCCESS) { std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)"; return NULL; } if (deviceBufferSize <= 0) { std::cerr << "No devices available."; return NULL; } // Allocate memory for the devices buffer cldevices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)]; errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, cldevices, NULL); if (errNum != CL_SUCCESS) { delete [] cldevices; std::cerr << "Failed to get device IDs"; return NULL; } commandQueue = clCreateCommandQueue(context, cldevices[device_id], 0, NULL); if (commandQueue == NULL) { delete [] cldevices; std::cerr << "Failed to create commandQueue for device "; return NULL; } *cldevice = cldevices[device_id]; delete [] cldevices; return commandQueue; } /// // Create an OpenCL program from the kernel source file // cl_program CreateProgram(cl_context context, cl_device_id cldevice, const char* fileName) { cl_int errNum; cl_program program; std::ifstream kernelFile(fileName, std::ios::in); if (!kernelFile.is_open()) { std::cerr << "Failed to open file for reading: " << fileName << std::endl; return NULL; } std::ostringstream oss; oss << kernelFile.rdbuf(); std::string srcStdStr = oss.str(); const char *srcStr = srcStdStr.c_str(); program = clCreateProgramWithSource(context, 1, (const char**)&srcStr, NULL, NULL); if (program == NULL) { std::cerr << "Failed to create CL program from source." << std::endl; return NULL; } errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (errNum != CL_SUCCESS) { // Determine the reason for the error char buildLog[16384]; clGetProgramBuildInfo(program, cldevice, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL); std::cerr << "Error in kernel: " << std::endl; std::cerr << buildLog; clReleaseProgram(program); return NULL; } return program; } /// // Cleanup any created OpenCL resources // void Cleanup(cl_context context, cl_command_queue commandQueue, cl_program program, cl_kernel kernel) { if (commandQueue != 0) clReleaseCommandQueue(commandQueue); if (kernel != 0) clReleaseKernel(kernel); if (program != 0) clReleaseProgram(program); if (context != 0) clReleaseContext(context); } /// // Load the input image. // int getImg(char * srcFilename, unsigned char *srcImg, int inputSize) { // printf("Loading ipnput: %s\n", srcFilename); char path[] = "../../data/dwt2d/"; char *newSrc = NULL; if((newSrc = (char *)malloc(strlen(srcFilename)+strlen(path)+1)) != NULL) { newSrc[0] = '\0'; strcat(newSrc, path); strcat(newSrc, srcFilename); srcFilename= newSrc; } printf("Loading ipnput: %s\n", srcFilename); //read image int i = open(srcFilename, O_RDONLY, 0644); if (i == -1) { error(0,errno,"cannot access %s", srcFilename); return -1; } int ret = read(i, srcImg, inputSize); printf("precteno %d, inputsize %d\n", ret, inputSize); close(i); return 0; } /// //Show user how to use this program // void usage() { printf("dwt [otpions] src_img.rgb \n\ -d, --dimension\t\tdimensions of src img, e.g. 1920x1080\n\ -c, --components\t\tnumber of color components, default 3\n\ -b, --depth\t\t\tbit depth, default 8\n\ -l, --level\t\t\tDWT level, default 3\n\ -D, --device\t\t\tcuda device\n\ -f, --forward\t\t\tforward transform\n\ -r, --reverse\t\t\treverse transform\n\ -9, --97\t\t\t9/7 transform\n\ -5, --53\t\t\t5/3 transform\n\ -w --write-visual\t\twrite output in visual (tiled) fashion instead of the linear\n\ -p --platform_id\t\t\tOCL platform id to use\n\ -g --use_gpu\t\t\t1 to use gpu, 0 to use cpu\n\ -i --device_id\t\t\tOCL devicde id to use\n"); } /// // Check the type of error about opencl program // void fatal_CL(cl_int error, int line_no) { printf("At line %d: ", line_no); switch(error) { case CL_SUCCESS: printf("CL_SUCCESS\n"); break; case CL_DEVICE_NOT_FOUND: printf("CL_DEVICE_NOT_FOUND\n"); break; case CL_DEVICE_NOT_AVAILABLE: printf("CL_DEVICE_NOT_AVAILABLE\n"); break; case CL_COMPILER_NOT_AVAILABLE: printf("CL_COMPILER_NOT_AVAILABLE\n"); break; case CL_MEM_OBJECT_ALLOCATION_FAILURE: printf("CL_MEM_OBJECT_ALLOCATION_FAILURE\n"); break; case CL_OUT_OF_RESOURCES: printf("CL_OUT_OF_RESOURCES\n"); break; case CL_OUT_OF_HOST_MEMORY: printf("CL_OUT_OF_HOST_MEMORY\n"); break; case CL_PROFILING_INFO_NOT_AVAILABLE: printf("CL_PROFILING_INFO_NOT_AVAILABLE\n"); break; case CL_MEM_COPY_OVERLAP: printf("CL_MEM_COPY_OVERLAP\n"); break; case CL_IMAGE_FORMAT_MISMATCH: printf("CL_IMAGE_FORMAT_MISMATCH\n"); break; case CL_IMAGE_FORMAT_NOT_SUPPORTED: printf("CL_IMAGE_FORMAT_NOT_SUPPORTED\n"); break; case CL_BUILD_PROGRAM_FAILURE: printf("CL_BUILD_PROGRAM_FAILURE\n"); break; case CL_MAP_FAILURE: printf("CL_MAP_FAILURE\n"); break; case CL_INVALID_VALUE: printf("CL_INVALID_VALUE\n"); break; case CL_INVALID_DEVICE_TYPE: printf("CL_INVALID_DEVICE_TYPE\n"); break; case CL_INVALID_PLATFORM: printf("CL_INVALID_PLATFORM\n"); break; case CL_INVALID_DEVICE: printf("CL_INVALID_DEVICE\n"); break; case CL_INVALID_CONTEXT: printf("CL_INVALID_CONTEXT\n"); break; case CL_INVALID_QUEUE_PROPERTIES: printf("CL_INVALID_QUEUE_PROPERTIES\n"); break; case CL_INVALID_COMMAND_QUEUE: printf("CL_INVALID_COMMAND_QUEUE\n"); break; case CL_INVALID_HOST_PTR: printf("CL_INVALID_HOST_PTR\n"); break; case CL_INVALID_MEM_OBJECT: printf("CL_INVALID_MEM_OBJECT\n"); break; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: printf("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR\n"); break; case CL_INVALID_IMAGE_SIZE: printf("CL_INVALID_IMAGE_SIZE\n"); break; case CL_INVALID_SAMPLER: printf("CL_INVALID_SAMPLER\n"); break; case CL_INVALID_BINARY: printf("CL_INVALID_BINARY\n"); break; case CL_INVALID_BUILD_OPTIONS: printf("CL_INVALID_BUILD_OPTIONS\n"); break; case CL_INVALID_PROGRAM: printf("CL_INVALID_PROGRAM\n"); break; case CL_INVALID_PROGRAM_EXECUTABLE: printf("CL_INVALID_PROGRAM_EXECUTABLE\n"); break; case CL_INVALID_KERNEL_NAME: printf("CL_INVALID_KERNEL_NAME\n"); break; case CL_INVALID_KERNEL_DEFINITION: printf("CL_INVALID_KERNEL_DEFINITION\n"); break; case CL_INVALID_KERNEL: printf("CL_INVALID_KERNEL\n"); break; case CL_INVALID_ARG_INDEX: printf("CL_INVALID_ARG_INDEX\n"); break; case CL_INVALID_ARG_VALUE: printf("CL_INVALID_ARG_VALUE\n"); break; case CL_INVALID_ARG_SIZE: printf("CL_INVALID_ARG_SIZE\n"); break; case CL_INVALID_KERNEL_ARGS: printf("CL_INVALID_KERNEL_ARGS\n"); break; case CL_INVALID_WORK_DIMENSION: printf("CL_INVALID_WORK_DIMENSION\n"); break; case CL_INVALID_WORK_GROUP_SIZE: printf("CL_INVALID_WORK_GROUP_SIZE\n"); break; case CL_INVALID_WORK_ITEM_SIZE: printf("CL_INVALID_WORK_ITEM_SIZE\n"); break; case CL_INVALID_GLOBAL_OFFSET: printf("CL_INVALID_GLOBAL_OFFSET\n"); break; case CL_INVALID_EVENT_WAIT_LIST: printf("CL_INVALID_EVENT_WAIT_LIST\n"); break; case CL_INVALID_EVENT: printf("CL_INVALID_EVENT\n"); break; case CL_INVALID_OPERATION: printf("CL_INVALID_OPERATION\n"); break; case CL_INVALID_GL_OBJECT: printf("CL_INVALID_GL_OBJECT\n"); break; case CL_INVALID_BUFFER_SIZE: printf("CL_INVALID_BUFFER_SIZE\n"); break; case CL_INVALID_MIP_LEVEL: printf("CL_INVALID_MIP_LEVEL\n"); break; case CL_INVALID_GLOBAL_WORK_SIZE: printf("CL_INVALID_GLOBAL_WORK_SIZE\n"); break; #ifdef CL_VERSION_1_1 case CL_MISALIGNED_SUB_BUFFER_OFFSET: printf("CL_MISALIGNED_SUB_BUFFER_OFFSET\n"); break; case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: printf("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST\n"); break; #endif default: printf("Invalid OpenCL error code\n"); } } /// // Separate compoents of 8bit RGB source image //in file components.cu void rgbToComponents(cl_mem d_r, cl_mem d_g, cl_mem d_b, unsigned char * h_src, int width, int height) { int pixels = width * height; int alignedSize = DIVANDRND(width*height, THREADS) * THREADS * 3; //aligned to thread block size -- THREADS cl_mem cl_d_src; cl_d_src = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, pixels*3, h_src, &errNum); // fatal_CL(errNum, __LINE__); size_t globalWorkSize[1] = { alignedSize/3}; size_t localWorkSize[1] = { THREADS }; errNum = clSetKernelArg(c_CopySrcToComponents, 0, sizeof(cl_mem), &d_r); errNum |= clSetKernelArg(c_CopySrcToComponents, 1, sizeof(cl_mem), &d_g); errNum |= clSetKernelArg(c_CopySrcToComponents, 2, sizeof(cl_mem), &d_b); errNum |= clSetKernelArg(c_CopySrcToComponents, 3, sizeof(cl_mem), &cl_d_src); errNum |= clSetKernelArg(c_CopySrcToComponents, 4, sizeof(int), &pixels); // fatal_CL(errNum, __LINE__); errNum = clEnqueueNDRangeKernel(commandQueue, c_CopySrcToComponents, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); // fatal_CL(errNum, __LINE__); // Free Memory errNum = clReleaseMemObject(cl_d_src); // fatal_CL(errNum, __LINE__); } /// // Copy a 8bit source image data into a color compoment //in file components.cu void bwToComponent(cl_mem d_c, unsigned char * h_src, int width, int height) { cl_mem cl_d_src; int pixels = width*height; int alignedSize = DIVANDRND(pixels, THREADS) * THREADS; cl_d_src = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, pixels, h_src, NULL); // fatal_CL(errNum, __LINE__); size_t globalWorkSize[1] = { alignedSize/9}; size_t localWorkSize[1] = { THREADS }; assert(alignedSize%(THREADS*3) == 0); errNum = clSetKernelArg(c_CopySrcToComponent, 0, sizeof(cl_mem), &d_c); errNum |= clSetKernelArg(c_CopySrcToComponent, 1, sizeof(cl_mem), &cl_d_src); errNum |= clSetKernelArg(c_CopySrcToComponent, 2, sizeof(int), &pixels); // fatal_CL(errNum, __LINE__); errNum = clEnqueueNDRangeKernel(commandQueue, c_CopySrcToComponent, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); std::cout<<"in function bwToComponent errNum= "<= 960) { launchFDWT53Kernel(192, 8, in, out, sizeX, sizeY); } else if (sizeX >= 480) { launchFDWT53Kernel(128, 8, in, out, sizeX, sizeY); } else { launchFDWT53Kernel(64, 8, in, out, sizeX, sizeY); } // if this was not the last level, continue recursively with other levels if (levels > 1) { // copy output's LL band back into input buffer const int llSizeX = (sizeX / 2) + ((sizeX % 2) ? 1 :0); const int llSizeY = (sizeY / 2) + ((sizeY % 2) ? 1 :0); memCopy(in, out, llSizeX, llSizeY); // run remaining levels of FDWT fdwt53(in, out, llSizeX, llSizeY, levels - 1); } } /// // in dwt.cu int nStage2dDWT(cl_mem in, cl_mem out, cl_mem backup, int pixWidth, int pixHeight, int stages, bool forward) { printf("\n*** %d stages of 2D forward DWT:\n", stages); // create backup of input, because each test iteration overwrites it const int size = pixHeight * pixWidth * sizeof(int); // Measure time of individual levels. if (forward) fdwt53(in, out, pixWidth, pixHeight, stages ); //else // rdwt(in, out, pixWidth, pixHeight, stages); // rdwt means rdwt53(can be found in file rdwt53.cu) which has not been defined return 0; } /// //in file dwt.cu void samplesToChar(unsigned char * dst, int * src, int samplesNum) { int i; for(i = 0; i < samplesNum; i++) { int r = src[i]+128; if (r > 255) r = 255; if (r < 0) r = 0; dst[i] = (unsigned char)r; } } /// //in file dwt.cu /// Write output linear orderd int writeLinear(cl_mem component, int pixWidth, int pixHeight, const char * filename, const char * suffix) { unsigned char * result; int *gpu_output; int i; int size; int samplesNum = pixWidth*pixHeight; size = samplesNum*sizeof(int); gpu_output = (int *)malloc(size); memset(gpu_output, 0, size); result = (unsigned char *)malloc(samplesNum); errNum = clEnqueueReadBuffer(commandQueue, component, CL_TRUE, 0, size, gpu_output, 0, NULL, NULL); // fatal_CL(errNum, __LINE__); // T to char samplesToChar(result, gpu_output, samplesNum); // Write component char outfile[strlen(filename)+strlen(suffix)]; strcpy(outfile, filename); strcpy(outfile+strlen(filename), suffix); i = open(outfile, O_CREAT|O_WRONLY, 0644); if (i == -1) { error(0,errno,"cannot access %s", outfile); return -1; } printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight); write(i, result, samplesNum); close(i); // Clean up free(gpu_output); free(result); return 0; } /// // Write output visual ordered //in file dwt.cu int writeNStage2DDWT(cl_mem component, int pixWidth, int pixHeight, int stages, const char * filename, const char * suffix) { struct band { int dimX; int dimY; }; struct dimensions { struct band LL; struct band HL; struct band LH; struct band HH; }; unsigned char * result; int *src; int *dst; int i,s; int size; int offset; int yOffset; int samplesNum = pixWidth*pixHeight; struct dimensions * bandDims; bandDims = (struct dimensions *)malloc(stages * sizeof(struct dimensions)); bandDims[0].LL.dimX = DIVANDRND(pixWidth,2); bandDims[0].LL.dimY = DIVANDRND(pixHeight,2); bandDims[0].HL.dimX = pixWidth - bandDims[0].LL.dimX; bandDims[0].HL.dimY = bandDims[0].LL.dimY; bandDims[0].LH.dimX = bandDims[0].LL.dimX; bandDims[0].LH.dimY = pixHeight - bandDims[0].LL.dimY; bandDims[0].HH.dimX = bandDims[0].HL.dimX; bandDims[0].HH.dimY = bandDims[0].LH.dimY; for (i = 1; i < stages; i++) { bandDims[i].LL.dimX = DIVANDRND(bandDims[i-1].LL.dimX,2); bandDims[i].LL.dimY = DIVANDRND(bandDims[i-1].LL.dimY,2); bandDims[i].HL.dimX = bandDims[i-1].LL.dimX - bandDims[i].LL.dimX; bandDims[i].HL.dimY = bandDims[i].LL.dimY; bandDims[i].LH.dimX = bandDims[i].LL.dimX; bandDims[i].LH.dimY = bandDims[i-1].LL.dimY - bandDims[i].LL.dimY; bandDims[i].HH.dimX = bandDims[i].HL.dimX; bandDims[i].HH.dimY = bandDims[i].LH.dimY; } #if 0 printf("Original image pixWidth x pixHeight: %d x %d\n", pixWidth, pixHeight); for (i = 0; i < stages; i++) { printf("Stage %d: LL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LL.dimX, bandDims[i].LL.dimY); printf("Stage %d: HL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HL.dimX, bandDims[i].HL.dimY); printf("Stage %d: LH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LH.dimX, bandDims[i].LH.dimY); printf("Stage %d: HH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HH.dimX, bandDims[i].HH.dimY); } #endif size = samplesNum*sizeof(int); src = (int *)malloc(size); memset(src, 0, size); dst = (int *)malloc(size); memset(dst, 0, size); result = (unsigned char *)malloc(samplesNum); errNum = clEnqueueReadBuffer(commandQueue, component, CL_TRUE, 0, size, src, 0, NULL, NULL); // fatal_CL(errNum, __LINE__); // LL Band size = bandDims[stages-1].LL.dimX * sizeof(int); for (i = 0; i < bandDims[stages-1].LL.dimY; i++) { memcpy(dst+i*pixWidth, src+i*bandDims[stages-1].LL.dimX, size); } for (s = stages - 1; s >= 0; s--) { // HL Band size = bandDims[s].HL.dimX * sizeof(int); offset = bandDims[s].LL.dimX * bandDims[s].LL.dimY; for (i = 0; i < bandDims[s].HL.dimY; i++) { memcpy(dst+i*pixWidth+bandDims[s].LL.dimX, src+offset+i*bandDims[s].HL.dimX, size); } // LH band size = bandDims[s].LH.dimX * sizeof(int); offset += bandDims[s].HL.dimX * bandDims[s].HL.dimY; yOffset = bandDims[s].LL.dimY; for (i = 0; i < bandDims[s].HL.dimY; i++) { memcpy(dst+(yOffset+i)*pixWidth, src+offset+i*bandDims[s].LH.dimX, size); } //HH band size = bandDims[s].HH.dimX * sizeof(int); offset += bandDims[s].LH.dimX * bandDims[s].LH.dimY; yOffset = bandDims[s].HL.dimY; for (i = 0; i < bandDims[s].HH.dimY; i++) { memcpy(dst+(yOffset+i)*pixWidth+bandDims[s].LH.dimX, src+offset+i*bandDims[s].HH.dimX, size); } } // Write component samplesToChar(result, dst, samplesNum); char outfile[strlen(filename)+strlen(suffix)]; strcpy(outfile, filename); strcpy(outfile+strlen(filename), suffix); i = open(outfile, O_CREAT|O_WRONLY, 0644); if (i == -1) { error(0,errno,"cannot access %s", outfile); return -1; } printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight); write(i, result, samplesNum); close(i); free(src); free(dst); free(result); free(bandDims); return 0; } /// // Process of DWT algorithm // template void processDWT(struct dwt *d, int forward, int writeVisual) { int componentSize = d->pixWidth * d->pixHeight * sizeof(T); T *c_r_out, *c_g_out, *c_b_out, *backup, *c_r, *c_g, *c_b; // initialize to zeros T *temp = (T *)malloc(componentSize); memset(temp, 0, componentSize); cl_mem cl_c_r_out; cl_c_r_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); cl_mem cl_backup; cl_backup = clCreateBuffer(context, CL_MEM_READ_WRITE |CL_MEM_COPY_HOST_PTR, componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); if (d->components == 3) { // Alloc two more buffers for G and B cl_mem cl_c_g_out; cl_c_g_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); cl_mem cl_c_b_out; cl_c_b_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); // Load components cl_mem cl_c_r, cl_c_g, cl_c_b; cl_c_r = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); cl_c_g = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); cl_c_b = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); rgbToComponents(cl_c_r, cl_c_g, cl_c_b, d->srcImg, d->pixWidth, d->pixHeight); //Compute DWT and always store int file nStage2dDWT(cl_c_r, cl_c_r_out, cl_backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); nStage2dDWT(cl_c_g, cl_c_g_out, cl_backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); nStage2dDWT(cl_c_b, cl_c_b_out, cl_backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); // ---------test---------- /* T *h_r_out=(T*)malloc(componentSize); errNum = clEnqueueReadBuffer(commandQueue, cl_c_g_out, CL_TRUE, 0, componentSize, h_r_out, 0, NULL, NULL); fatal_CL(errNum, __LINE__); int ii; for(ii=0;iipixWidth) == 0) fprintf(stderr, "\n"); } */ // ---------test---------- #ifdef OUTPUT // Store DWT to file if(writeVisual){ writeNStage2DDWT(cl_c_r_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".r"); writeNStage2DDWT(cl_c_g_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".g"); writeNStage2DDWT(cl_c_b_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".b"); } else { writeLinear(cl_c_r_out, d->pixWidth, d->pixHeight, d->outFilename, ".r"); writeLinear(cl_c_g_out, d->pixWidth, d->pixHeight, d->outFilename, ".g"); writeLinear(cl_c_b_out, d->pixWidth, d->pixHeight, d->outFilename, ".b"); } #endif clReleaseMemObject(cl_c_r); clReleaseMemObject(cl_c_g); clReleaseMemObject(cl_c_b); clReleaseMemObject(cl_c_g_out); clReleaseMemObject(cl_c_b_out); } else if(d->components == 1) { // Load components cl_mem cl_c_r; cl_c_r = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,componentSize, temp, &errNum); // fatal_CL(errNum, __LINE__); bwToComponent(cl_c_r, d->srcImg, d->pixWidth, d->pixHeight); // Compute DWT nStage2dDWT(cl_c_r, cl_c_r_out, cl_backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); //Store DWT to file if(writeVisual){ writeNStage2DDWT(cl_c_r_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".r"); } else { writeLinear(cl_c_r_out, d->pixWidth, d->pixHeight, d->outFilename, ".r"); } clReleaseMemObject(cl_c_r); } free(temp); clReleaseMemObject(cl_c_r_out); } int main(int argc, char **argv) { int optindex = 0; signed char ch; struct option longopts[] = { {"dimension", required_argument, 0, 'd'}, //dimensions of src img {"components", required_argument, 0, 'c'}, //numger of components of src img {"depth", required_argument, 0, 'b'}, //bit depth of src img {"level", required_argument, 0, 'l'}, //level of dwt {"device", required_argument, 0, 'D'}, //cuda device {"forward", no_argument, 0, 'f'}, //forward transform {"reverse", no_argument, 0, 'r'}, //forward transform {"97", no_argument, 0, '9'}, //9/7 transform {"53", no_argument, 0, '5' }, //5/3transform {"write-visual",no_argument, 0, 'w' }, //write output (subbands) in visual (tiled) order instead of linear {"help", no_argument, 0, 'h'}, {"platform_id", required_argument, 0, 'p'}, {"use_gpu", required_argument, 0, 'g'}, {"device_id", required_argument, 0, 'i'} }; int pixWidth = 0; //= strlen(optarg))) { usage(); return -1; } pixHeight = atoi(pos+1); break; case 'c': compCount = atoi(optarg); break; case 'b': bitDepth = atoi(optarg); break; case 'l': dwtLvls = atoi(optarg); break; case 'D': device = atoi(optarg); break; case 'f': forward = 1; break; case 'r': forward = 0; break; case '9': dwt97 = 1; break; case '5': dwt97 = 0; break; case 'w': writeVisual = 1; break; case 'p': platform_id = atoi(optarg); break; case 'g': use_gpu = atoi(optarg); break; case 'i': device_id = atoi(optarg); break; case 'h': usage(); return 0; case '?': return -1; default : usage(); return -1; } } argc -= optind; argv += optind; if (argc == 0) { // at least one filename is expected printf("Please supply src file name\n"); usage(); return -1; } if (pixWidth <= 0 || pixHeight <=0) { printf("Wrong or missing dimensions\n"); usage(); return -1; } if (forward == 0) { writeVisual = 0; //do not write visual when RDWT } // // device init // Create an OpenCL context on first available platform context = CreateContext(platform_id, use_gpu); if (context == NULL) { std::cerr << "Failed to create OpenCL context." << std::endl; return 1; } // Create a command-queue on the first device available // on the created context commandQueue = CreateCommandQueue(context, &cldevice, device_id); if (commandQueue == NULL) { Cleanup(context, commandQueue, program, kernel); return 1; } // Create OpenCL program from com_dwt.cl kernel source program = CreateProgram(context, cldevice, "com_dwt.cl"); if (program == NULL) { printf("fail to create program!!\n"); } // Create OpenCL kernel c_CopySrcToComponents = clCreateKernel(program, "c_CopySrcToComponents", NULL); if (c_CopySrcToComponents == NULL) { std::cerr << "Failed to create kernel" << std::endl; } c_CopySrcToComponent = clCreateKernel(program, "c_CopySrcToComponent", NULL); if (c_CopySrcToComponent == NULL) { std::cerr << "Failed to create kernel" << std::endl; } kl_fdwt53Kernel = clCreateKernel(program, "cl_fdwt53Kernel", NULL); if (kl_fdwt53Kernel == NULL) { std::cerr<<"Failed to create kernel\n"; } //initialize struct dwt struct dwt *d; d = (struct dwt *)malloc(sizeof(struct dwt)); d->srcImg = NULL; d->pixWidth = pixWidth; d->pixHeight = pixHeight; d->components = compCount; d->dwtLvls = dwtLvls; // file names d->srcFilename = (char *)malloc(strlen(argv[0])); strcpy(d->srcFilename, argv[0]); if (argc == 1) { // only one filename supplyed d->outFilename = (char *)malloc(strlen(d->srcFilename)+4); strcpy(d->outFilename, d->srcFilename); strcpy(d->outFilename+strlen(d->srcFilename), ".dwt"); } else { d->outFilename = strdup(argv[1]); } //Input review printf("\nSource file:\t\t%s\n", d->srcFilename); printf(" Dimensions:\t\t%dx%d\n", pixWidth, pixHeight); printf(" Components count:\t%d\n", compCount); printf(" Bit depth:\t\t%d\n", bitDepth); printf(" DWT levels:\t\t%d\n", dwtLvls); printf(" Forward transform:\t%d\n", forward); printf(" 9/7 transform:\t\t%d\n", dwt97); //data sizes int inputSize = pixWidth*pixHeight*compCount; //srcImg = (unsigned char *) malloc (inputSize); if (getImg(d->srcFilename, d->srcImg, inputSize) == -1) return -1; // DWT // Create memory objects, Set arguments for kernel functions, Queue the kernel up for execution across the array, Read the output buffer back to the Host, Output the result buffer if (forward == 1) { if(dwt97 == 1 ) processDWT(d, forward, writeVisual); else // 5/3 processDWT(d, forward, writeVisual); } else { // reverse if(dwt97 == 1 ) processDWT(d, forward, writeVisual); else // 5/3 processDWT(d, forward, writeVisual); } Cleanup(context, commandQueue, program, kernel); clReleaseKernel(c_CopySrcToComponents); clReleaseKernel(c_CopySrcToComponent); return 0; }