|
@@ -80,17 +80,21 @@ double * u;
|
|
@brief initializes the OpenCL context and detects available platforms
|
|
@brief initializes the OpenCL context and detects available platforms
|
|
@param use_gpu
|
|
@param use_gpu
|
|
**************************************************/
|
|
**************************************************/
|
|
-static int initialize(int use_gpu) {
|
|
|
|
|
|
+static int initialize(int platform_id, int device_id, int use_gpu) {
|
|
cl_int result;
|
|
cl_int result;
|
|
size_t size;
|
|
size_t size;
|
|
|
|
+
|
|
|
|
+ // modification to handle the case in which we have more than one OpenCL platform available on the system.
|
|
|
|
+ cl_uint platformCount;
|
|
|
|
|
|
// create OpenCL context
|
|
// create OpenCL context
|
|
- cl_platform_id platform_id[2];
|
|
|
|
- if (clGetPlatformIDs(2, platform_id, NULL) != CL_SUCCESS) {
|
|
|
|
- printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
|
|
|
|
- return -1;
|
|
|
|
- }
|
|
|
|
- cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id[1], 0};
|
|
|
|
|
|
+ clGetPlatformIDs(0, NULL, &platformCount);
|
|
|
|
+
|
|
|
|
+ cl_platform_id *platforms_ids;
|
|
|
|
+ platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
|
|
|
|
+ if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
|
|
|
|
+
|
|
|
|
+ cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms_ids[platform_id], 0};
|
|
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
|
|
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
|
|
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
|
|
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
|
|
if (!context) {
|
|
if (!context) {
|
|
@@ -118,7 +122,7 @@ static int initialize(int use_gpu) {
|
|
}
|
|
}
|
|
|
|
|
|
// create command queue for the first device
|
|
// create command queue for the first device
|
|
- cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
|
|
|
|
|
|
+ cmd_queue = clCreateCommandQueue(context, device_list[device_id], 0, NULL);
|
|
if (!cmd_queue) {
|
|
if (!cmd_queue) {
|
|
printf("ERROR: clCreateCommandQueue() failed\n");
|
|
printf("ERROR: clCreateCommandQueue() failed\n");
|
|
return -1;
|
|
return -1;
|
|
@@ -350,7 +354,7 @@ void getneighbors(int * se, int numOnes, double * neighbors, int radius) {
|
|
* @param Nfr The number of frames of the video
|
|
* @param Nfr The number of frames of the video
|
|
* @param seed The seed array used for number generation
|
|
* @param seed The seed array used for number generation
|
|
*/
|
|
*/
|
|
-void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed) {
|
|
|
|
|
|
+void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed, int platform_id, int device_id, int use_gpu) {
|
|
int k;
|
|
int k;
|
|
int max_size = IszX * IszY*Nfr;
|
|
int max_size = IszX * IszY*Nfr;
|
|
/*get object centers*/
|
|
/*get object centers*/
|
|
@@ -432,7 +436,7 @@ int findIndex(double * CDF, int lengthCDF, double value) {
|
|
* @param countOnes
|
|
* @param countOnes
|
|
*
|
|
*
|
|
*/
|
|
*/
|
|
-static int allocate(int Nparticles, int countOnes){
|
|
|
|
|
|
+static int allocate(int Nparticles, int countOnes, int platform_id, int device_id, int use_gpu){
|
|
/***** variables ******/
|
|
/***** variables ******/
|
|
|
|
|
|
int sourcesize = 1024 * 1024;
|
|
int sourcesize = 1024 * 1024;
|
|
@@ -453,8 +457,7 @@ static int allocate(int Nparticles, int countOnes){
|
|
fclose(fp);
|
|
fclose(fp);
|
|
|
|
|
|
// OpenCL initialization
|
|
// OpenCL initialization
|
|
- int use_gpu = 0;
|
|
|
|
- if (initialize(use_gpu)) return -1;
|
|
|
|
|
|
+ if (initialize(platform_id, device_id, use_gpu)) return -1;
|
|
|
|
|
|
// compile kernel
|
|
// compile kernel
|
|
cl_int err = 0;
|
|
cl_int err = 0;
|
|
@@ -552,7 +555,7 @@ static int allocate(int Nparticles, int countOnes){
|
|
* @param seed The seed array used for random number generation
|
|
* @param seed The seed array used for random number generation
|
|
* @param Nparticles The number of particles to be used
|
|
* @param Nparticles The number of particles to be used
|
|
*/
|
|
*/
|
|
-int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles) {
|
|
|
|
|
|
+int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles, int platform_id, int device_id, int use_gpu) {
|
|
int max_size = IszX * IszY*Nfr;
|
|
int max_size = IszX * IszY*Nfr;
|
|
long long start = get_time();
|
|
long long start = get_time();
|
|
//original particle centroid
|
|
//original particle centroid
|
|
@@ -587,7 +590,7 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
|
|
//initial likelihood to 0.0
|
|
//initial likelihood to 0.0
|
|
|
|
|
|
//allocate all of the memory for the computation
|
|
//allocate all of the memory for the computation
|
|
- allocate(Nparticles, countOnes);
|
|
|
|
|
|
+ allocate(Nparticles, countOnes, platform_id, device_id, use_gpu);
|
|
|
|
|
|
for (x = 0; x < Nparticles; x++) {
|
|
for (x = 0; x < Nparticles; x++) {
|
|
arrayX[x] = xe;
|
|
arrayX[x] = xe;
|
|
@@ -772,19 +775,25 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
|
|
|
|
|
|
int main(int argc, char * argv[]) {
|
|
int main(int argc, char * argv[]) {
|
|
|
|
|
|
- char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles>";
|
|
|
|
|
|
+ char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles> -p <platform_id> -d <device_id> -g <use_gpu>";
|
|
//check number of arguments
|
|
//check number of arguments
|
|
- if (argc != 9) {
|
|
|
|
|
|
+ if (argc != 15) {
|
|
printf("%s\n", usage);
|
|
printf("%s\n", usage);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
//check args deliminators
|
|
//check args deliminators
|
|
- if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")) {
|
|
|
|
|
|
+ if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")
|
|
|
|
+ || strcmp(argv[9], "-p") || strcmp(argv[11], "-d") || strcmp(argv[13], "-g")) {
|
|
printf("%s\n", usage);
|
|
printf("%s\n", usage);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
int IszX, IszY, Nfr, Nparticles;
|
|
int IszX, IszY, Nfr, Nparticles;
|
|
|
|
+
|
|
|
|
+ // Additional variables for platform and device selection
|
|
|
|
+ int platform_id = 0;
|
|
|
|
+ int device_id = 0;
|
|
|
|
+ int use_gpu = 0;
|
|
|
|
|
|
//converting a string to a integer
|
|
//converting a string to a integer
|
|
if (sscanf(argv[2], "%d", &IszX) == EOF) {
|
|
if (sscanf(argv[2], "%d", &IszX) == EOF) {
|
|
@@ -829,6 +838,22 @@ int main(int argc, char * argv[]) {
|
|
printf("Number of particles must be > 0\n");
|
|
printf("Number of particles must be > 0\n");
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ if (sscanf(argv[10], "%d", &platform_id) == EOF) {
|
|
|
|
+ printf("ERROR: platform_id is incorrect");
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (sscanf(argv[12], "%d", &device_id) == EOF) {
|
|
|
|
+ printf("ERROR: device_id is incorrect");
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (sscanf(argv[14], "%d", &use_gpu) == EOF) {
|
|
|
|
+ printf("ERROR: use_gpu is incorrect, r, 1 for GPU and 0 for CPU");
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
//establish seed
|
|
//establish seed
|
|
int * seed = (int *) malloc(sizeof (int) *Nparticles);
|
|
int * seed = (int *) malloc(sizeof (int) *Nparticles);
|
|
int i;
|
|
int i;
|
|
@@ -838,11 +863,11 @@ int main(int argc, char * argv[]) {
|
|
int * I = (int *) malloc(sizeof (int) *IszX * IszY * Nfr);
|
|
int * I = (int *) malloc(sizeof (int) *IszX * IszY * Nfr);
|
|
long long start = get_time();
|
|
long long start = get_time();
|
|
//call video sequence
|
|
//call video sequence
|
|
- videoSequence(I, IszX, IszY, Nfr, seed);
|
|
|
|
|
|
+ videoSequence(I, IszX, IszY, Nfr, seed, platform_id, device_id, use_gpu);
|
|
long long endVideoSequence = get_time();
|
|
long long endVideoSequence = get_time();
|
|
printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence));
|
|
printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence));
|
|
//call particle filter
|
|
//call particle filter
|
|
- particleFilter(I, IszX, IszY, Nfr, seed, Nparticles);
|
|
|
|
|
|
+ particleFilter(I, IszX, IszY, Nfr, seed, Nparticles, platform_id, device_id, use_gpu);
|
|
long long endParticleFilter = get_time();
|
|
long long endParticleFilter = get_time();
|
|
printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter));
|
|
printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter));
|
|
printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter));
|
|
printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter));
|