RCCE_heat_synch.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. //
  2. // Copyright 2010 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. #include "RCCE.h"
  17. long long RC_global_clock();
  18. #include <stdio.h>
  19. #include <math.h>
  20. /* hardwired predefined constants */
  21. #define NX 256
  22. #define NY 256
  23. #define NXNY ((NX)*(NY))
  24. #define NXNY1 ((NX)*(NY-1))
  25. #define NXNY2 ((NX)*(NY-2))
  26. /*
  27. #define O1 0
  28. #define O2 NX-1
  29. #define O3 NX
  30. #define O4 NX+1
  31. #define O5 2*(NX)
  32. #define W1 0.25
  33. #define W2 0.25
  34. #define W4 0.25
  35. #define W5 0.25
  36. #define W3 -1.0
  37. #define FABS(x) ((x)>0?(x):(-x))
  38. */
  39. /*
  40. const int scc2grid_mapping[48] = {0, 6, 1, 7, 2, 8,
  41. 3, 9, 4, 10, 5, 11,
  42. 12, 18, 13, 19, 14, 20,
  43. 15, 21, 16, 22, 17, 23,
  44. 24, 30, 25, 31, 26, 32,
  45. 27, 33, 28, 34, 29, 35,
  46. 36, 42, 37, 43, 38, 44,
  47. 39, 45, 40, 46, 41, 47};
  48. const int grid2scc_mapping[48] = {0, 2, 4, 6, 8, 10,
  49. 1, 3, 5, 7, 9, 11,
  50. 12, 14, 16, 18, 20, 22,
  51. 13, 15, 17, 19, 21, 23,
  52. 24, 26, 28, 30, 32, 34,
  53. 25, 27, 19, 31, 33, 35,
  54. 36, 38, 40, 42, 44, 46,
  55. 37, 39, 41, 43, 45, 47};
  56. */
  57. int RCCE_APP(int argc, char **argv){
  58. float ***U, buffer_in[NX], buffer_out[NX];
  59. int i, j, iter, itermax=10;
  60. int ID, ID_right, ID_left, y_start, y_fin; //, ID_up, ID_down;
  61. int NTILES1;
  62. double time;
  63. //char *result;
  64. RCCE_init(&argc, &argv);
  65. NTILES1 = RCCE_num_ues();
  66. if ((NTILES1>1) && (NTILES1%2)) {
  67. printf("Grid width should be multiple of 2: %d\n", NTILES1);
  68. exit(1);
  69. }
  70. ID = RCCE_ue();
  71. printf("My UE is %d\n", ID);
  72. ID_right = (ID+1)%RCCE_num_ues(); /* As if all PEs lie horizontally */
  73. ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();
  74. y_start = ID * (NY / NTILES1);
  75. y_fin = ((ID+1) * (NY / NTILES1)) - 1;
  76. /*
  77. if (NTILES1 <= 12) {
  78. CPUX = 2;
  79. CPUY = NTILES1 / CPUX;
  80. } else {
  81. printf("Ntiles greater than 12\n");
  82. }
  83. // Observation 1: In scc laytou every + 2 for right will be either in or out w.r.t. NTILES1
  84. if ((ID+2) >= NTILES1) {
  85. ID_right = -1;
  86. } else {
  87. ID_right = ID+2;
  88. }
  89. if ((ID-2) >= 0) {
  90. ID_left = -1;
  91. } else {
  92. ID_left = ID-2;
  93. }
  94. if ((ID%2) == 0) { // Down row -- this applies only up to 16
  95. ID_up = ID + 1;
  96. ID_down = -1;
  97. } else {
  98. ID_down = ID - 1;
  99. ID_up = -1;
  100. }
  101. printf("Core %d ID_right %d ID_left %d ID_up %d ID_down %d\n", ID, ID_right, ID_left, ID_up, ID_down);
  102. */
  103. if (argc>1) itermax=atoi(*++argv);
  104. //if (!ID) printf("Core %d Executing %d iterations\n", ID, itermax);
  105. /* initialize array a on all tiles; this stuffs a into private caches */
  106. U = (float ***) malloc(2 * sizeof(float **));
  107. U[0] = (float **) malloc(NX * sizeof(float *));
  108. U[1] = (float **) malloc(NX * sizeof(float *));
  109. for (i=0; i<NX; i++) {
  110. U[0][i] = (float *) malloc(NY * sizeof(float));
  111. U[1][i] = (float *) malloc(NY * sizeof(float));
  112. for (j=0; j<NY; j++) {
  113. U[0][i][j] = 4.0;
  114. }
  115. }
  116. //if (!ID) printf("Core %d Executing %d iterations\n", ID, itermax);
  117. /* main loop */
  118. RCCE_barrier(&RCCE_COMM_WORLD);
  119. time = RCCE_wtime();
  120. for (iter=0; iter<itermax; iter++) {
  121. for (i = 1; i < NX - 1; i ++)
  122. for (j = y_start; j < y_fin; j ++) {
  123. U[(iter+1)%2][i][j] = (1/4)*(U[iter%2][i-1][j]+U[iter%2][i][j-1] +U[iter%2][i+1][j]+U[iter%2][i][j+1]);
  124. }
  125. /* start with copying fringe data to neighboring tiles; we need to
  126. group semantic send/recv pairs together to avoid deadlock */
  127. if (NTILES1 > 1) {
  128. /* send to right */
  129. for (i=0; i<NX; i++) {
  130. buffer_out[i] = U[(iter+1)%2][i][y_fin];
  131. }
  132. if (ID_right!=0) RCCE_send((char*)(&buffer_out[0]), NX*sizeof(float), ID_right);
  133. if (ID != 0) {
  134. RCCE_recv((char*)(&buffer_in[0]), NX*sizeof(float), ID_left);
  135. for (i=0; i<NX; i++) {
  136. U[(iter+1)%2][i][y_start-1] = buffer_in[i];
  137. }
  138. }
  139. /* send to left */
  140. for (i=0; i<NX; i++) {
  141. buffer_out[i] = U[(iter+1)%2][i][y_start];
  142. }
  143. if (ID!=0) RCCE_send((char *)(&buffer_out[0]), NX*sizeof(float), ID_left);
  144. if (ID_right!=0) {
  145. RCCE_recv((char *)(&buffer_in[0]), NX*sizeof(float), ID_right);
  146. for (i=0; i<NX; i++) {
  147. U[(iter+1)%2][i][y_fin+1] = buffer_in[i];
  148. }
  149. }
  150. }
  151. }
  152. RCCE_barrier(&RCCE_COMM_WORLD);
  153. time = RCCE_wtime()-time;
  154. /* print result strip by strip; avoid output mangling by letting one core print */
  155. /*
  156. checksum = 0.0;
  157. if (ID==0) {
  158. for (id=0; id<=NTILES1; id++) {
  159. if (id!=ID) RCCE_recv((char *)a, NXNY*sizeof(float), id);
  160. int start = NX; int end = NXNY1;
  161. if (id==0) start = 0;
  162. if (id == NTILES1) end = NXNY;
  163. for (offset=0, i=start; i<end; i++) {
  164. if (!(i%NX)) {printf("\n");}
  165. printf("%1.4f ",a[i+offset]);
  166. checksum += (a[i+offset]*a[i+offset]);
  167. }
  168. }
  169. checksum = sqrt(checksum/NXNY);
  170. }
  171. else RCCE_send((char *)a, NXNY*sizeof(float), 0);
  172. */
  173. if (ID==0) {
  174. printf("\nTotal time: %lf\n", time);
  175. fflush(NULL);
  176. //printf("Checksum = %lf", checksum);
  177. /*
  178. if (NTILES1==3 && itermax == 10) {
  179. vchecksum = 0.635851;
  180. error = FABS(vchecksum - checksum);
  181. if (error < 1.e-5) result = "SUCCESSFUL";
  182. else result = "FAILED";
  183. printf(" Verification value = %lf, error = %lf; run %s",
  184. vchecksum, error, result);
  185. }
  186. if (NTILES1==15 && itermax == 100) {
  187. vchecksum = 1.019079;
  188. error = FABS(vchecksum - checksum);
  189. if (error < 1.e-5) result = "SUCCESSFUL";
  190. else result = "FAILED";
  191. printf(" Verification value = %lf, error = %lf; run %s",
  192. vchecksum, error, result);
  193. }
  194. if (NTILES1==47 && itermax == 1000) {
  195. vchecksum = 1.741555;
  196. error = FABS(vchecksum - checksum);
  197. if (error < 1.e-5) result = "SUCCESSFUL";
  198. else result = "FAILED";
  199. printf(" Verification value = %lf, error = %lf; run %s",
  200. vchecksum, error, result);
  201. }
  202. */
  203. }
  204. RCCE_finalize();
  205. return(0);
  206. }