RCCE_pstencil.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. //
  2. // Copyright 2010 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. #include "RCCE.h"
  17. void print_dividers(void);
  18. #include <stdio.h>
  19. /* hardwired predefined constants */
  20. #define NX 16
  21. #define NY 25
  22. #define NXNY ((NX)*(NY))
  23. #define NXNY1 ((NX)*(NY-1))
  24. #define NXNY2 ((NX)*(NY-2))
  25. #define O1 0
  26. #define O2 NX-1
  27. #define O3 NX
  28. #define O4 NX+1
  29. #define O5 2*(NX)
  30. #define W1 0.25
  31. #define W2 0.25
  32. #define W4 0.25
  33. #define W5 0.25
  34. #define W3 -1.0
  35. /* initialization;
  36. resulting 2D data set represented by a[] is as follows, where
  37. first and last row of each strip are fixed boundary values (1's
  38. and 2's) or fringe data copied from strips on neighboring tiles.
  39. 1 1 1 1 1 1 1 1 1 1
  40. 0 0 0 0 0 0 0 0 0 0
  41. ................... CORE 0
  42. 0 0 0 0 0 0 0 0 0 0
  43. 0 0 0 0 0 0 0 0 0 0
  44. 0 0 0 0 0 0 0 0 0 0
  45. 0 0 0 0 0 0 0 0 0 0
  46. ................... CORE 1
  47. 0 0 0 0 0 0 0 0 0 0
  48. 0 0 0 0 0 0 0 0 0 0
  49. 0 0 0 0 0 0 0 0 0 0
  50. 0 0 0 0 0 0 0 0 0 0
  51. ................... CORE 2
  52. 0 0 0 0 0 0 0 0 0 0
  53. 0 0 0 0 0 0 0 0 0 0
  54. 0 0 0 0 0 0 0 0 0 0
  55. 0 0 0 0 0 0 0 0 0 0
  56. ................... CORE NTILES-1
  57. 0 0 0 0 0 0 0 0 0 0
  58. 2 2 2 2 2 2 2 2 2 2
  59. */
  60. int RCCE_APP(int argc, char **argv){
  61. float a[NXNY];
  62. int i, offset, iter=3;
  63. int fdiv, vlevel;
  64. int ID, ID_right, ID_left;
  65. int NTILES1;
  66. double time;
  67. RCCE_REQUEST req;
  68. RCCE_init(&argc, &argv);
  69. // RCCE_debug_set(RCCE_DEBUG_ALL);
  70. NTILES1 = RCCE_num_ues()-1;
  71. ID = RCCE_ue();
  72. ID_right = (ID+1)%RCCE_num_ues();
  73. ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();
  74. // set the relevant areas of the board to the default frequency and voltage
  75. RCCE_set_frequency_divider(8, &fdiv);
  76. if (ID==0)print_dividers();
  77. // return(0);
  78. // RCCE_iset_power(3, &req, &fdiv, &vlevel);
  79. // if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  80. // RCCE_wait_power(&req);
  81. // RCCE_set_frequency_divider(3, &fdiv);
  82. if (NX%8) {
  83. printf("Grid width should be multiple of 8: %d\n", NX);
  84. exit(1);
  85. }
  86. if (argc>1) iter=atoi(*++argv);
  87. if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);
  88. /* initialize array a on all tiles; this stuffs a into private caches */
  89. for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0;
  90. if (ID == 0)
  91. for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0;
  92. if (ID == NTILES1)
  93. for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0;
  94. /* main loop */
  95. if (ID==0) time = RCCE_wtime();
  96. while ((iter--)>0){
  97. RCCE_iset_power(3, &req, &fdiv, &vlevel);
  98. if (ID==RCCE_power_domain_master())
  99. printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
  100. fflush(NULL);
  101. if (!(iter%100)) printf("Iteration %d\n", iter);
  102. /* start with copying fringe data to neighboring tiles; we need to
  103. group semantic send/recv pairs together to avoid deadlock */
  104. if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
  105. if (ID != 0) RCCE_recv((char*)(&a[0]), NX*sizeof(float), ID_left);
  106. RCCE_wait_power(&req);
  107. if (ID!=0) RCCE_send((char *)(&a[NX]), NX*sizeof(float), ID_left);
  108. if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);
  109. RCCE_iset_power(3, &req, &fdiv, &vlevel);
  110. RCCE_set_frequency_divider(3, &fdiv);
  111. if (ID==RCCE_power_domain_master())
  112. printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
  113. fflush(NULL);
  114. /* apply the stencil operation */
  115. for (i=0; i<NXNY2; i++) {
  116. a[i+O3] +=
  117. W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
  118. }
  119. RCCE_wait_power(&req);
  120. }
  121. // /* print result strip by strip; this would not be done on RC */
  122. // for (int id=0; id<=NTILES1; id++) {
  123. // RCCE_barrier(&RCCE_COMM_WORLD);
  124. // if (ID==id) {
  125. // int start = NX; int end = NXNY1;
  126. // if (ID==0) start = 0;
  127. // if (ID == NTILES1) end = NXNY;
  128. // for (offset=0, i=start; i<end; i++) {
  129. // if (!(i%NX)) printf("\n");
  130. //// comment out next line and uncomment subsequent three to print error
  131. // printf("%1.5f ",a[i+offset]); fflush(stdout);
  132. //// int jj=i/NX+(ID*(NY-1));
  133. //// double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
  134. //// printf("%f ",a[i+offset]-aexact);
  135. // }
  136. // }
  137. // }
  138. // RCCE_barrier(&RCCE_COMM_WORLD);
  139. // if (ID==0) {
  140. // printf("\n");
  141. // time = RCCE_wtime()-time;
  142. // printf("Total time: %lf\n", time);
  143. // }
  144. //reset the relevant areas of the board to the default frequency and voltage
  145. // RCCE_set_frequency_divider(8, &fdiv);
  146. // RCCE_iset_power(2, &req, &fdiv, &vlevel);
  147. // if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  148. // RCCE_wait_power(&req);
  149. // RCCE_set_frequency_divider(3, &fdiv);
  150. RCCE_barrier(&RCCE_COMM_WORLD);
  151. if (ID==0)print_dividers();
  152. RCCE_finalize();
  153. return(0);
  154. }