RCCE_stencil_synch.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. //
  2. // Copyright 2010 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. #include "RCCE.h"
  17. long long RC_global_clock();
  18. #include <stdio.h>
  19. #include <math.h>
  20. /* hardwired predefined constants */
  21. #define NX 1024
  22. #define NY 1024
  23. #define NXNY ((NX)*(NY))
  24. #define NXNY1 ((NX)*(NY-1))
  25. #define NXNY2 ((NX)*(NY-2))
  26. #define O1 0
  27. #define O2 NX-1
  28. #define O3 NX
  29. #define O4 NX+1
  30. #define O5 2*(NX)
  31. #define W1 0.25
  32. #define W2 0.25
  33. #define W4 0.25
  34. #define W5 0.25
  35. #define W3 -1.0
  36. #define FABS(x) ((x)>0?(x):(-x))
  37. /* initialization;
  38. resulting 2D data set represented by a[] is as follows, where
  39. first and last row of each strip are fixed boundary values (1's
  40. and 2's) or fringe data copied from strips on neighboring tiles.
  41. 1 1 1 1 1 1 1 1 1 1
  42. 0 0 0 0 0 0 0 0 0 0
  43. ................... CORE 0
  44. 0 0 0 0 0 0 0 0 0 0
  45. 0 0 0 0 0 0 0 0 0 0
  46. 0 0 0 0 0 0 0 0 0 0
  47. 0 0 0 0 0 0 0 0 0 0
  48. ................... CORE 1
  49. 0 0 0 0 0 0 0 0 0 0
  50. 0 0 0 0 0 0 0 0 0 0
  51. 0 0 0 0 0 0 0 0 0 0
  52. 0 0 0 0 0 0 0 0 0 0
  53. ................... CORE 2
  54. 0 0 0 0 0 0 0 0 0 0
  55. 0 0 0 0 0 0 0 0 0 0
  56. 0 0 0 0 0 0 0 0 0 0
  57. 0 0 0 0 0 0 0 0 0 0
  58. ................... CORE NTILES-1
  59. 0 0 0 0 0 0 0 0 0 0
  60. 2 2 2 2 2 2 2 2 2 2
  61. */
  62. int RCCE_APP(int argc, char **argv){
  63. float a[NXNY], checksum, vchecksum, error;
  64. int i, offset, iter=10, itermax, id;
  65. int ID, ID_right, ID_left;
  66. int NTILES1;
  67. double time;
  68. char *result;
  69. RCCE_init(&argc, &argv);
  70. NTILES1 = RCCE_num_ues()-1;
  71. ID = RCCE_ue();
  72. printf("My UE is %d\n", ID);
  73. ID_right = (ID+1)%RCCE_num_ues();
  74. ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();
  75. if (NX%8) {
  76. printf("Grid width should be multiple of 8: %d\n", NX);
  77. exit(1);
  78. }
  79. if (argc>1) iter=atoi(*++argv);
  80. if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);
  81. itermax = iter;
  82. /* initialize array a on all tiles; this stuffs a into private caches */
  83. for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0;
  84. if (ID == 0)
  85. for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0;
  86. if (ID == NTILES1)
  87. for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0;
  88. /* main loop */
  89. RCCE_barrier(&RCCE_COMM_WORLD);
  90. time = RCCE_wtime();
  91. while ((iter--)>0){
  92. /* start with copying fringe data to neighboring tiles; we need to
  93. group semantic send/recv pairs together to avoid deadlock */
  94. if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
  95. if (ID != 0) RCCE_recv((char*)(&a[0]), NX*sizeof(float), ID_left);
  96. if (ID!=0) RCCE_send((char *)(&a[NX]), NX*sizeof(float), ID_left);
  97. if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);
  98. /* apply the stencil operation */
  99. for (i=0; i<NXNY2; i++) {
  100. a[i+O3] +=
  101. W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
  102. }
  103. }
  104. RCCE_barrier(&RCCE_COMM_WORLD);
  105. time = RCCE_wtime()-time;
  106. /* print result strip by strip; avoid output mangling by letting one core print */
  107. checksum = 0.0;
  108. if (ID==0) {
  109. for (id=0; id<=NTILES1; id++) {
  110. if (id!=ID) RCCE_recv((char *)a, NXNY*sizeof(float), id);
  111. int start = NX; int end = NXNY1;
  112. if (id==0) start = 0;
  113. if (id == NTILES1) end = NXNY;
  114. for (offset=0, i=start; i<end; i++) {
  115. //if (!(i%NX)) {printf("\n");}
  116. //printf("%1.4f ",a[i+offset]);
  117. checksum += (a[i+offset]*a[i+offset]);
  118. }
  119. }
  120. checksum = sqrt(checksum/NXNY);
  121. }
  122. else RCCE_send((char *)a, NXNY*sizeof(float), 0);
  123. if (ID==0) {
  124. printf("\n");
  125. printf("Total time: %lf\n", time); fflush(NULL);
  126. printf("Checksum = %lf", checksum);
  127. if (NTILES1==3 && itermax == 10) {
  128. vchecksum = 0.635851;
  129. error = FABS(vchecksum - checksum);
  130. if (error < 1.e-5) result = "SUCCESSFUL";
  131. else result = "FAILED";
  132. printf(" Verification value = %lf, error = %lf; run %s",
  133. vchecksum, error, result);
  134. }
  135. if (NTILES1==15 && itermax == 100) {
  136. vchecksum = 1.019079;
  137. error = FABS(vchecksum - checksum);
  138. if (error < 1.e-5) result = "SUCCESSFUL";
  139. else result = "FAILED";
  140. printf(" Verification value = %lf, error = %lf; run %s",
  141. vchecksum, error, result);
  142. }
  143. if (NTILES1==47 && itermax == 1000) {
  144. vchecksum = 1.741555;
  145. error = FABS(vchecksum - checksum);
  146. if (error < 1.e-5) result = "SUCCESSFUL";
  147. else result = "FAILED";
  148. printf(" Verification value = %lf, error = %lf; run %s",
  149. vchecksum, error, result);
  150. }
  151. }
  152. RCCE_finalize();
  153. return(0);
  154. }