RCCE_stencil.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. //
  2. // Copyright 2010 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. #include "RCCE.h"
  17. long long RC_global_clock();
  18. #include <stdio.h>
  19. /* hardwired predefined constants */
  20. #define NX 1024
  21. #define NY 1024
  22. #define NXNY ((NX)*(NY))
  23. #define NXNY1 ((NX)*(NY-1))
  24. #define NXNY2 ((NX)*(NY-2))
  25. #define O1 0
  26. #define O2 NX-1
  27. #define O3 NX
  28. #define O4 NX+1
  29. #define O5 2*(NX)
  30. #define W1 0.25
  31. #define W2 0.25
  32. #define W4 0.25
  33. #define W5 0.25
  34. #define W3 -1.0
  35. /* initialization;
  36. resulting 2D data set represented by a[] is as follows, where
  37. first and last row of each strip are fixed boundary values (1's
  38. and 2's) or fringe data copied from strips on neighboring tiles.
  39. 1 1 1 1 1 1 1 1 1 1
  40. 0 0 0 0 0 0 0 0 0 0
  41. ................... CORE 0
  42. 0 0 0 0 0 0 0 0 0 0
  43. 0 0 0 0 0 0 0 0 0 0
  44. 0 0 0 0 0 0 0 0 0 0
  45. 0 0 0 0 0 0 0 0 0 0
  46. ................... CORE 1
  47. 0 0 0 0 0 0 0 0 0 0
  48. 0 0 0 0 0 0 0 0 0 0
  49. 0 0 0 0 0 0 0 0 0 0
  50. 0 0 0 0 0 0 0 0 0 0
  51. ................... CORE 2
  52. 0 0 0 0 0 0 0 0 0 0
  53. 0 0 0 0 0 0 0 0 0 0
  54. 0 0 0 0 0 0 0 0 0 0
  55. 0 0 0 0 0 0 0 0 0 0
  56. ................... CORE NTILES-1
  57. 0 0 0 0 0 0 0 0 0 0
  58. 2 2 2 2 2 2 2 2 2 2
  59. */
  60. int RCCE_APP(int argc, char **argv) {
  61. /* statically allocated space sits in off-chip private memory */
  62. float a[NXNY], *buff;
  63. int i, offset, iter=10, tile;
  64. int MY_ID;
  65. int NTILES1;
  66. double time;
  67. RCCE_FLAG flag0, flag1;
  68. RCCE_init(&argc, &argv);
  69. NTILES1 = RCCE_num_ues()-1;
  70. MY_ID = RCCE_ue();
  71. if (NX%8) {
  72. printf("Grid width should be multiple of 8: %d\n", NX);
  73. exit(1);
  74. }
  75. if (argc>1) iter=atoi(*++argv);
  76. if (MY_ID==0) printf("Executing %d iterations\n", iter);
  77. /* allocate space on the comm buffer */
  78. buff = (float *) RCCE_malloc(sizeof(float)*2*NX);
  79. /* Allocate flags to coordinate comm. */
  80. if (RCCE_flag_alloc(&flag0)) return(1);
  81. if (RCCE_flag_alloc(&flag1)) return(1);
  82. /* initialize array a on all tiles; this stuffs a into private caches */
  83. for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0;
  84. if (MY_ID == 0)
  85. for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0;
  86. if (MY_ID == NTILES1)
  87. for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0;
  88. /* put in a barrier so everybody can be sure to have initialized */
  89. RCCE_barrier(&RCCE_COMM_WORLD);
  90. /* main loop */
  91. if (MY_ID==0) time = RCCE_wtime();
  92. while ((iter--)>0){
  93. /* start with copying fringe data to neighboring tiles */
  94. if (MY_ID!=NTILES1) {
  95. /* Initialize neighbor flag to zero */
  96. RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1);
  97. /* copy private data to shared comm buffer of neighbor */
  98. RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1);
  99. RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1);
  100. }
  101. if (MY_ID != 0) {
  102. /* Initialize neighbor flag to zero */
  103. RCCE_flag_write(&flag1, 0, MY_ID-1);
  104. /* copy private data to shared comm buffer of neighbor */
  105. RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1);
  106. RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1);
  107. }
  108. /* Make sure the data has been recvd and copy data out of buffer(s) */
  109. if (MY_ID!=NTILES1) {
  110. RCCE_wait_until(flag1, RCCE_FLAG_SET);
  111. RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID);
  112. }
  113. if (MY_ID!=0) {
  114. RCCE_wait_until(flag0, RCCE_FLAG_SET);
  115. RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID);
  116. }
  117. /* apply the stencil operation */
  118. for (i=0; i<(NXNY2/NTILES1); i++) {
  119. a[i+O3] +=
  120. W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
  121. }
  122. }
  123. RCCE_barrier(&RCCE_COMM_WORLD);
  124. if (MY_ID==0) {
  125. time = RCCE_wtime()-time;
  126. }
  127. /* print result strip by strip; this would not be done on RC */
  128. for (int id=0; id<=NTILES1; id++) {
  129. RCCE_barrier(&RCCE_COMM_WORLD);
  130. if (MY_ID==id) {
  131. int start = NX; int end = NXNY1;
  132. if (MY_ID==0) start = 0;
  133. if (MY_ID == NTILES1) end = NXNY;
  134. //for (offset=0, i=start; i<end; i++) {
  135. // if (!(i%NX)) printf("\n");
  136. // comment out next line and uncomment subsequent three to print error
  137. // printf("%f ",a[i+offset]);
  138. // int jj=i/NX+(MY_ID*(NY-1));
  139. // double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
  140. // printf("%f ",a[i+offset]-aexact);
  141. //}
  142. }
  143. }
  144. RCCE_barrier(&RCCE_COMM_WORLD);
  145. if (MY_ID==0) {
  146. printf("\nTotal time: %lf\n", time);
  147. }
  148. RCCE_finalize();
  149. return(0);
  150. }