ソースを参照

Implemented device selection for particlefinder

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

I reused the available implementation of paramters parsing adding the
needed options. Also changed the initialization of the OpenCL platforms
to avoid to allocate statically an array and using a dynamic memory
allocation.
Andrea Gussoni 8 年 前
コミット
7b76a3c3e4

+ 44 - 19
opencl/particlefilter/ex_particle_OCL_naive_seq.cpp

@@ -80,17 +80,21 @@ double * u;
   @brief initializes the OpenCL context and detects available platforms
 @param use_gpu 
 **************************************************/
-static int initialize(int use_gpu) {
+static int initialize(int platform_id, int device_id, int use_gpu) {
     cl_int result;
     size_t size;
+    
+    // modification to handle the case in which we have more than one OpenCL platform available on the system.
+  	cl_uint platformCount;
 
     // create OpenCL context
-    cl_platform_id platform_id[2];
-    if (clGetPlatformIDs(2, platform_id, NULL) != CL_SUCCESS) {
-        printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
-        return -1;
-    }
-    cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id[1], 0};
+    clGetPlatformIDs(0, NULL, &platformCount);
+  	
+  	cl_platform_id *platforms_ids;
+  	platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
+  	if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
+
+    cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms_ids[platform_id], 0};
     device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
     context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
     if (!context) {
@@ -118,7 +122,7 @@ static int initialize(int use_gpu) {
     }
 
     // create command queue for the first device
-    cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
+    cmd_queue = clCreateCommandQueue(context, device_list[device_id], 0, NULL);
     if (!cmd_queue) {
         printf("ERROR: clCreateCommandQueue() failed\n");
         return -1;
@@ -350,7 +354,7 @@ void getneighbors(int * se, int numOnes, double * neighbors, int radius) {
  * @param Nfr The number of frames of the video
  * @param seed The seed array used for number generation
  */
-void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed) {
+void videoSequence(int * I, int IszX, int IszY, int Nfr, int * seed, int platform_id, int device_id, int use_gpu) {
     int k;
     int max_size = IszX * IszY*Nfr;
     /*get object centers*/
@@ -432,7 +436,7 @@ int findIndex(double * CDF, int lengthCDF, double value) {
  * @param countOnes
 *
 */
-static int allocate(int Nparticles, int countOnes){
+static int allocate(int Nparticles, int countOnes, int platform_id, int device_id, int use_gpu){
 	/***** variables ******/
 
 	int sourcesize = 1024 * 1024;
@@ -453,8 +457,7 @@ static int allocate(int Nparticles, int countOnes){
 	fclose(fp);
 
 	// OpenCL initialization
-	int use_gpu = 0;
-	if (initialize(use_gpu)) return -1;
+	if (initialize(platform_id, device_id, use_gpu)) return -1;
 
 	// compile kernel
 	cl_int err = 0;
@@ -552,7 +555,7 @@ static int allocate(int Nparticles, int countOnes){
  * @param seed The seed array used for random number generation
  * @param Nparticles The number of particles to be used
  */
-int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles) {
+int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles, int platform_id, int device_id, int use_gpu) {
 	int max_size = IszX * IszY*Nfr;
 	long long start = get_time();
 	//original particle centroid
@@ -587,7 +590,7 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
 	//initial likelihood to 0.0
 
 	//allocate all of the memory for the computation 
-	allocate(Nparticles, countOnes);
+	allocate(Nparticles, countOnes, platform_id, device_id, use_gpu);
 	
 	for (x = 0; x < Nparticles; x++) {
 		arrayX[x] = xe;
@@ -772,19 +775,25 @@ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Npartic
 
 int main(int argc, char * argv[]) {
 
-    char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles>";
+    char* usage = "naive.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles> -p <platform_id> -d <device_id> -g <use_gpu>";
     //check number of arguments
-    if (argc != 9) {
+    if (argc != 15) {
         printf("%s\n", usage);
         return 0;
     }
     //check args deliminators
-    if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")) {
+    if (strcmp(argv[1], "-x") || strcmp(argv[3], "-y") || strcmp(argv[5], "-z") || strcmp(argv[7], "-np")
+        || strcmp(argv[9], "-p") || strcmp(argv[11], "-d") || strcmp(argv[13], "-g")) {
         printf("%s\n", usage);
         return 0;
     }
 
     int IszX, IszY, Nfr, Nparticles;
+    
+    // Additional variables for platform and device selection
+    int platform_id = 0;
+    int device_id = 0;
+    int use_gpu = 0;
 
     //converting a string to a integer
     if (sscanf(argv[2], "%d", &IszX) == EOF) {
@@ -829,6 +838,22 @@ int main(int argc, char * argv[]) {
         printf("Number of particles must be > 0\n");
         return 0;
     }
+    
+    if (sscanf(argv[10], "%d", &platform_id) == EOF) {
+        printf("ERROR: platform_id is incorrect");
+        return 0;
+    }
+    
+    if (sscanf(argv[12], "%d", &device_id) == EOF) {
+        printf("ERROR: device_id is incorrect");
+        return 0;
+    }
+    
+    if (sscanf(argv[14], "%d", &use_gpu) == EOF) {
+        printf("ERROR: use_gpu is incorrect, r, 1 for GPU and 0 for CPU");
+        return 0;
+    }
+    
     //establish seed
     int * seed = (int *) malloc(sizeof (int) *Nparticles);
     int i;
@@ -838,11 +863,11 @@ int main(int argc, char * argv[]) {
     int * I = (int *) malloc(sizeof (int) *IszX * IszY * Nfr);
     long long start = get_time();
     //call video sequence
-    videoSequence(I, IszX, IszY, Nfr, seed);
+    videoSequence(I, IszX, IszY, Nfr, seed, platform_id, device_id, use_gpu);
     long long endVideoSequence = get_time();
     printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence));
     //call particle filter
-    particleFilter(I, IszX, IszY, Nfr, seed, Nparticles);
+    particleFilter(I, IszX, IszY, Nfr, seed, Nparticles, platform_id, device_id, use_gpu);
     long long endParticleFilter = get_time();
     printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter));
     printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter));

+ 1 - 1
opencl/particlefilter/run-cpu

@@ -1 +1 @@
-./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000
+./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000 -p 1 -d 0 -g 0

+ 1 - 0
opencl/particlefilter/run-gpu

@@ -0,0 +1 @@
+./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 100000 -p 0 -d 0 -g 1