ソースを参照

Implemented device selection for particlefinder

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

I reused the available implementation of paramters parsing adding the
needed options. Also changed the initialization of the OpenCL platforms
to avoid to allocate statically an array and using a dynamic memory
allocation.
Andrea Gussoni 8 年 前
コミット
7b76a3c3e4

+ 44 - 19
opencl/particlefilter/ex_particle_OCL_naive_seq.cpp

@@ -80,17 +80,21 @@ double * u;
   @brief initializes the OpenCL context and detects available platforms
   @brief initializes the OpenCL context and detects available platforms
 @param use_gpu 
 @param use_gpu 
 **************************************************/
 **************************************************/
-static int initialize(int use_gpu) {
+static int initialize(int platform_id, int device_id, int use_gpu) {
     cl_int result;
     cl_int result;
     size_t size;
     size_t size;
+    
+    // modification to handle the case in which we have more than one OpenCL platform available on the system.
+  	cl_uint platformCount;
 
 
     // create OpenCL context
     // create OpenCL context
-    cl_platform_id platform_id[2];
-    if (clGetPlatformIDs(2, platform_id, NULL) != CL_SUCCESS) {
-        printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
-        return -1;
-    }
-    cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id[1], 0};
+    clGetPlatformIDs(0, NULL, &platformCount);
+  	
+  	cl_platform_id *platforms_ids;
+  	platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
+  	if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
+
+    cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms_ids[platform_id], 0};
     device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
     device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
     context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
     context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
     if (!context) {
     if (!context) {
@@ -118,7 +122,7 @@ static int initialize(int use_gpu) {
     }
     }
 
 
     // create command queue for the first device
     // create command queue for the first device
-    cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
+    cmd_queue = clCreateCommandQueue(context, device_list[device_id], 0, NULL);
     if (!cmd_queue) {
     if (!cmd_queue) {
         printf("ERROR: clCreateCommandQueue() failed\n");
         printf("ERROR: clCreateCommandQueue() failed\n");
         return -1;
         return -1;
@@ -350,7 +354,7 @@ void getneighbors(int * se, int numOnes, double * neighbors, int radius) {
  * @param Nfr The number of frames of the video
  * @param Nfr The number of frames of the video
  * @param seed The seed array used for number generation
  * @param seed The seed array used for number generation
  */
  */
-void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed) {
+void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed, int platform_id, int device_id, int use_gpu) {
     int k;
     int k;
     int max_size = IszX * IszY*Nfr;
     int max_size = IszX * IszY*Nfr;
     /*get object centers*/
     /*get object centers*/
@@ -432,7 +436,7 @@ int findIndex(double * CDF, int lengthCDF, double value) {
  * @param countOnes
  * @param countOnes
 *
 *
 */
 */
-static int allocate(int Nparticles, int countOnes){
+static int allocate(int Nparticles, int countOnes, int platform_id, int device_id, int use_gpu){
 	/***** variables ******/
 	/***** variables ******/
 
 
 	int sourcesize = 1024 * 1024;
 	int sourcesize = 1024 * 1024;
@@ -453,8 +457,7 @@ static int allocate(int Nparticles, int countOnes){
 	fclose(fp);
 	fclose(fp);
 
 
 	// OpenCL initialization
 	// OpenCL initialization
-	int use_gpu = 0;
-	if (initialize(use_gpu)) return -1;
+	if (initialize(platform_id, device_id, use_gpu)) return -1;
 
 
 	// compile kernel
 	// compile kernel
 	cl_int err = 0;
 	cl_int err = 0;
@@ -552,7 +555,7 @@ static int allocate(int Nparticles, int countOnes){
  * @param seed The seed array used for random number generation
  * @param seed The seed array used for random number generation
  * @param Nparticles The number of particles to be used
  * @param Nparticles The number of particles to be used
  */
  */
-int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles) {
+int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles, int platform_id, int device_id, int use_gpu) {
 	int max_size = IszX * IszY*Nfr;
 	int max_size = IszX * IszY*Nfr;
 	long long start = get_time();
 	long long start = get_time();
 	//original particle centroid
 	//original particle centroid
@@ -587,7 +590,7 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
 	//initial likelihood to 0.0
 	//initial likelihood to 0.0
 
 
 	//allocate all of the memory for the computation 
 	//allocate all of the memory for the computation 
-	allocate(Nparticles, countOnes);
+	allocate(Nparticles, countOnes, platform_id, device_id, use_gpu);
 	
 	
 	for (x = 0; x < Nparticles; x++) {
 	for (x = 0; x < Nparticles; x++) {
 		arrayX[x] = xe;
 		arrayX[x] = xe;
@@ -772,19 +775,25 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
 
 
 int main(int argc, char * argv[]) {
 int main(int argc, char * argv[]) {
 
 
-    char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles>";
+    char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles> -p <platform_id> -d <device_id> -g <use_gpu>";
     //check number of arguments
     //check number of arguments
-    if (argc != 9) {
+    if (argc != 15) {
         printf("%s\n", usage);
         printf("%s\n", usage);
         return 0;
         return 0;
     }
     }
     //check args deliminators
     //check args deliminators
-    if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")) {
+    if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")
+        || strcmp(argv[9], "-p") || strcmp(argv[11], "-d") || strcmp(argv[13], "-g")) {
         printf("%s\n", usage);
         printf("%s\n", usage);
         return 0;
         return 0;
     }
     }
 
 
     int IszX, IszY, Nfr, Nparticles;
     int IszX, IszY, Nfr, Nparticles;
+    
+    // Additional variables for platform and device selection
+    int platform_id = 0;
+    int device_id = 0;
+    int use_gpu = 0;
 
 
     //converting a string to a integer
     //converting a string to a integer
     if (sscanf(argv[2], "%d", &IszX) == EOF) {
     if (sscanf(argv[2], "%d", &IszX) == EOF) {
@@ -829,6 +838,22 @@ int main(int argc, char * argv[]) {
         printf("Number of particles must be > 0\n");
         printf("Number of particles must be > 0\n");
         return 0;
         return 0;
     }
     }
+    
+    if (sscanf(argv[10], "%d", &platform_id) == EOF) {
+        printf("ERROR: platform_id is incorrect");
+        return 0;
+    }
+    
+    if (sscanf(argv[12], "%d", &device_id) == EOF) {
+        printf("ERROR: device_id is incorrect");
+        return 0;
+    }
+    
+    if (sscanf(argv[14], "%d", &use_gpu) == EOF) {
+        printf("ERROR: use_gpu is incorrect, r, 1 for GPU and 0 for CPU");
+        return 0;
+    }
+    
     //establish seed
     //establish seed
     int * seed = (int *) malloc(sizeof (int) *Nparticles);
     int * seed = (int *) malloc(sizeof (int) *Nparticles);
     int i;
     int i;
@@ -838,11 +863,11 @@ int main(int argc, char * argv[]) {
     int * I = (int *) malloc(sizeof (int) *IszX * IszY * Nfr);
     int * I = (int *) malloc(sizeof (int) *IszX * IszY * Nfr);
     long long start = get_time();
     long long start = get_time();
     //call video sequence
     //call video sequence
-    videoSequence(I, IszX, IszY, Nfr, seed);
+    videoSequence(I, IszX, IszY, Nfr, seed, platform_id, device_id, use_gpu);
     long long endVideoSequence = get_time();
     long long endVideoSequence = get_time();
     printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence));
     printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence));
     //call particle filter
     //call particle filter
-    particleFilter(I, IszX, IszY, Nfr, seed, Nparticles);
+    particleFilter(I, IszX, IszY, Nfr, seed, Nparticles, platform_id, device_id, use_gpu);
     long long endParticleFilter = get_time();
     long long endParticleFilter = get_time();
     printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter));
     printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter));
     printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter));
     printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter));

+ 1 - 1
opencl/particlefilter/run-cpu

@@ -1 +1 @@
-./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000
+./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000 -p 1 -d 0 -g 0

+ 1 - 0
opencl/particlefilter/run-gpu

@@ -0,0 +1 @@
+./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000 -p 0 -d 0 -g 1