RCCE_stencil_synch.c-orig.svn-base 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. //
  2. // Copyright 2010 Intel Corporation
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. #include "RCCE.h"
  17. long long RC_global_clock();
  18. #include <stdio.h>
  19. /* hardwired predefined constants */
  20. #define NX 16
  21. #define NY 25
  22. #define NXNY ((NX)*(NY))
  23. #define NXNY1 ((NX)*(NY-1))
  24. #define NXNY2 ((NX)*(NY-2))
  25. #define O1 0
  26. #define O2 NX-1
  27. #define O3 NX
  28. #define O4 NX+1
  29. #define O5 2*(NX)
  30. #define W1 0.25
  31. #define W2 0.25
  32. #define W4 0.25
  33. #define W5 0.25
  34. #define W3 -1.0
  35. /* initialization;
  36. resulting 2D data set represented by a[] is as follows, where
  37. first and last row of each strip are fixed boundary values (1's
  38. and 2's) or fringe data copied from strips on neighboring tiles.
  39. 1 1 1 1 1 1 1 1 1 1
  40. 0 0 0 0 0 0 0 0 0 0
  41. ................... CORE 0
  42. 0 0 0 0 0 0 0 0 0 0
  43. 0 0 0 0 0 0 0 0 0 0
  44. 0 0 0 0 0 0 0 0 0 0
  45. 0 0 0 0 0 0 0 0 0 0
  46. ................... CORE 1
  47. 0 0 0 0 0 0 0 0 0 0
  48. 0 0 0 0 0 0 0 0 0 0
  49. 0 0 0 0 0 0 0 0 0 0
  50. 0 0 0 0 0 0 0 0 0 0
  51. ................... CORE 2
  52. 0 0 0 0 0 0 0 0 0 0
  53. 0 0 0 0 0 0 0 0 0 0
  54. 0 0 0 0 0 0 0 0 0 0
  55. 0 0 0 0 0 0 0 0 0 0
  56. ................... CORE NTILES-1
  57. 0 0 0 0 0 0 0 0 0 0
  58. 2 2 2 2 2 2 2 2 2 2
  59. */
  60. int RCCE_APP(int argc, char **argv){
  61. float a[NXNY];
  62. int i, offset, iter=10;
  63. int ID, ID_right, ID_left;
  64. int NTILES1;
  65. double time;
  66. RCCE_init(&argc, &argv);
  67. NTILES1 = RCCE_num_ues()-1;
  68. ID = RCCE_ue();
  69. printf("My UE is %d\n", ID);
  70. ID_right = (ID+1)%RCCE_num_ues();
  71. ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();
  72. if (NX%8) {
  73. printf("Grid width should be multiple of 8: %d\n", NX);
  74. exit(1);
  75. }
  76. if (argc>1) iter=atoi(*++argv);
  77. if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);
  78. /* initialize array a on all tiles; this stuffs a into private caches */
  79. for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0;
  80. if (ID == 0)
  81. for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0;
  82. if (ID == NTILES1)
  83. for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0;
  84. /* main loop */
  85. RCCE_barrier(&RCCE_COMM_WORLD);
  86. time = RCCE_wtime();
  87. while ((iter--)>0){
  88. /* start with copying fringe data to neighboring tiles; we need to
  89. group semantic send/recv pairs together to avoid deadlock */
  90. if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
  91. if (ID != 0) RCCE_recv((char*)(&a[0]), NX*sizeof(float), ID_left);
  92. if (ID!=0) RCCE_send((char *)(&a[NX]), NX*sizeof(float), ID_left);
  93. if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);
  94. /* apply the stencil operation */
  95. for (i=0; i<NXNY2; i++) {
  96. a[i+O3] +=
  97. W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
  98. }
  99. }
  100. RCCE_barrier(&RCCE_COMM_WORLD);
  101. time = RCCE_wtime()-time;
  102. /* print result strip by strip */
  103. for (int id=0; id<=NTILES1; id++) {
  104. RCCE_barrier(&RCCE_COMM_WORLD);
  105. if (ID==id) {
  106. int start = NX; int end = NXNY1;
  107. if (ID==0) start = 0;
  108. if (ID == NTILES1) end = NXNY;
  109. for (offset=0, i=start; i<end; i++) {
  110. if (!(i%NX)) printf("\n");
  111. // comment out next line and uncomment subsequent three to print error
  112. printf("%1.5f ",a[i+offset]); fflush(stdout);
  113. // int jj=i/NX+(ID*(NY-1));
  114. // double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
  115. // printf("%f ",a[i+offset]-aexact);
  116. }
  117. }
  118. }
  119. RCCE_barrier(&RCCE_COMM_WORLD);
  120. if (ID==0) {
  121. printf("\n");
  122. printf("Total time: %lf\n", time);
  123. }
  124. RCCE_finalize();
  125. return(0);
  126. }