Kernels.cl 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. /* ============================================================
  2. //--cambine: kernel funtion of pgain
  3. //--author: created by Jianbin Fang
  4. //--date: 02/03/2011
  5. ============================================================ */
  6. typedef struct {
  7. float weight;
  8. //float *coord;
  9. long assign; /* number of point where this one is assigned */
  10. float cost; /* cost of that assignment, weight*distance */
  11. } Point_Struct;
  12. //#define Elements
  13. __kernel void memset_kernel(__global char * mem_d, short val, int number_bytes){
  14. const int thread_id = get_global_id(0);
  15. mem_d[thread_id] = val;
  16. }
  17. //--9 parameters
  18. /* kernel */
  19. __kernel void pgain_kernel(
  20. __global Point_Struct *p,
  21. __global float *coord_d,
  22. __global float * work_mem_d,
  23. __global int *center_table_d,
  24. __global char *switch_membership_d,
  25. __local float *coord_s,
  26. int num,
  27. int dim,
  28. long x,
  29. int K){
  30. /* block ID and global thread ID */
  31. const int thread_id = get_global_id(0);
  32. const int local_id = get_local_id(0);
  33. if(thread_id<num){
  34. // coordinate mapping of point[x] to shared mem
  35. if(local_id == 0)
  36. for(int i=0; i<dim; i++){
  37. coord_s[i] = coord_d[i*num + x];
  38. }
  39. barrier(CLK_LOCAL_MEM_FENCE);
  40. // cost between this point and point[x]: euclidean distance multiplied by weight
  41. float x_cost = 0.0;
  42. for(int i=0; i<dim; i++)
  43. x_cost += (coord_d[(i*num)+thread_id]-coord_s[i]) * (coord_d[(i*num)+thread_id]-coord_s[i]);
  44. x_cost = x_cost * p[thread_id].weight;
  45. float current_cost = p[thread_id].cost;
  46. int base = thread_id*(K+1);
  47. // if computed cost is less then original (it saves), mark it as to reassign
  48. if ( x_cost < current_cost ){
  49. switch_membership_d[thread_id] = '1';
  50. int addr_1 = base + K;
  51. work_mem_d[addr_1] = x_cost - current_cost;
  52. }
  53. // if computed cost is larger, save the difference
  54. else {
  55. int assign = p[thread_id].assign;
  56. int addr_2 = base + center_table_d[assign];
  57. work_mem_d[addr_2] += current_cost - x_cost;
  58. }
  59. }
  60. }