bandwidth.c 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdio.h>
  17. #include <unistd.h>
  18. #include <starpu.h>
  19. #include "../helper.h"
  20. /*
  21. * Measure the memory bandwidth available to kernels depending on the number of
  22. * kernels and number of idle workers.
  23. */
  24. #ifdef STARPU_QUICK_CHECK
  25. static size_t size = 1024;
  26. static unsigned cpustep = 4;
  27. #else
  28. /* Must be bigger than available cache size per core, 64MiB should be enough */
  29. static size_t size = 64UL << 20;
  30. static unsigned cpustep = 1;
  31. #endif
  32. static unsigned noalone = 0;
  33. static unsigned iter = 30;
  34. static unsigned total_ncpus;
  35. static starpu_pthread_barrier_t barrier;
  36. static float *result;
  37. static void **buffers;
  38. void bw_func(void *descr[], void *arg)
  39. {
  40. void *src = buffers[starpu_worker_get_id()];
  41. void *dst = src + size;
  42. unsigned i;
  43. double start, stop;
  44. int ret;
  45. memset(src, 0, size);
  46. memset(dst, 0, size);
  47. STARPU_PTHREAD_BARRIER_WAIT(&barrier);
  48. start = starpu_timing_now();
  49. for (i = 0; i < iter; i++)
  50. {
  51. memcpy(dst, src, size);
  52. STARPU_SYNCHRONIZE();
  53. }
  54. stop = starpu_timing_now();
  55. STARPU_PTHREAD_BARRIER_WAIT(&barrier);
  56. result[starpu_worker_get_id()] = (size*iter) / (stop - start);
  57. }
  58. static struct starpu_codelet bw_codelet =
  59. {
  60. .cpu_funcs = {bw_func},
  61. .model = NULL,
  62. .nbuffers = 0,
  63. };
  64. static void usage(char **argv)
  65. {
  66. fprintf(stderr, "Usage: %s [-n iter] [-s size (MB)] [-i increment] [-a]\n", argv[0]);
  67. fprintf(stderr, "\t-n iter\tNumber of iterations\n");
  68. fprintf(stderr, "\t-s size\tBuffer size in MB\n");
  69. fprintf(stderr, "\t-i increment\tCpu number increment\n");
  70. fprintf(stderr, "\t-a\tDo not run the alone test\n");
  71. exit(EXIT_FAILURE);
  72. }
  73. static void parse_args(int argc, char **argv)
  74. {
  75. int c;
  76. while ((c = getopt(argc, argv, "n:s:c:ah")) != -1)
  77. switch(c)
  78. {
  79. case 'n':
  80. iter = atoi(optarg);
  81. break;
  82. case 's':
  83. size = (long)atoi(optarg) << 20;
  84. break;
  85. case 'c':
  86. cpustep = atoi(optarg);
  87. break;
  88. case 'a':
  89. noalone = 1;
  90. break;
  91. case 'h':
  92. usage(argv);
  93. break;
  94. }
  95. }
  96. static unsigned interleave(unsigned i)
  97. {
  98. /* TODO: rather distribute over hierarchy */
  99. if (total_ncpus > 1)
  100. return (i % (total_ncpus/2))*2 + i / (total_ncpus/2);
  101. else
  102. return 0;
  103. }
  104. static float bench(int *argc, char ***argv, unsigned nbusy, unsigned ncpus, int intl)
  105. {
  106. int ret;
  107. unsigned i;
  108. struct starpu_conf conf;
  109. float bw;
  110. starpu_conf_init(&conf);
  111. conf.ncuda = 0;
  112. conf.nopencl = 0;
  113. conf.nmic = 0;
  114. conf.nmpi_ms = 0;
  115. conf.ncpus = ncpus;
  116. if (intl && nbusy == ncpus)
  117. {
  118. conf.use_explicit_workers_bindid = 1;
  119. for (i = 0; i < ncpus; i++)
  120. conf.workers_bindid[i] = interleave(i);
  121. }
  122. ret = starpu_initialize(&conf, argc, argv);
  123. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  124. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  125. STARPU_PTHREAD_BARRIER_INIT(&barrier, NULL, nbusy);
  126. for (i = 0; i < nbusy; i++)
  127. {
  128. struct starpu_task *task = starpu_task_create();
  129. task->cl = &bw_codelet;
  130. task->execute_on_a_specific_worker = 1;
  131. if (intl && nbusy != ncpus)
  132. task->workerid = interleave(i);
  133. else
  134. task->workerid = i;
  135. ret = starpu_task_submit(task);
  136. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  137. }
  138. starpu_task_wait_for_all();
  139. starpu_shutdown();
  140. for (bw = 0., i = 0; i < nbusy; i++)
  141. {
  142. bw += result[i];
  143. }
  144. return bw;
  145. }
  146. int main(int argc, char **argv)
  147. {
  148. int ret;
  149. unsigned n;
  150. struct starpu_conf conf;
  151. float alone, alone_int, idle, idle_int;
  152. parse_args(argc, argv);
  153. starpu_conf_init(&conf);
  154. conf.ncuda = 0;
  155. conf.nopencl = 0;
  156. conf.nmic = 0;
  157. conf.nmpi_ms = 0;
  158. ret = starpu_initialize(&conf, &argc, &argv);
  159. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  160. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  161. total_ncpus = starpu_cpu_worker_get_count();
  162. starpu_shutdown();
  163. result = malloc(total_ncpus * sizeof(result[0]));
  164. buffers = malloc(total_ncpus * sizeof(*buffers));
  165. for (n = 0; n < total_ncpus; n++)
  166. {
  167. #ifdef STARPU_HAVE_POSIX_MEMALIGN
  168. ret = posix_memalign(&buffers[n], getpagesize(), 2*size);
  169. STARPU_ASSERT(ret == 0);
  170. #else
  171. buffers[n] = malloc(2*size);
  172. #endif
  173. }
  174. printf("# nw\talone\t\t+idle\t\tefficiency\talone int.l\t+idle int.l\tefficiency\n");
  175. for (n = cpustep; n <= total_ncpus; n += cpustep)
  176. {
  177. if (noalone)
  178. {
  179. alone = 0.;
  180. alone_int = 0.;
  181. }
  182. else
  183. {
  184. alone = bench(&argc, &argv, n, n, 0);
  185. alone_int = bench(&argc, &argv, n, n, 1);
  186. }
  187. idle = bench(&argc, &argv, n, total_ncpus, 0);
  188. idle_int = bench(&argc, &argv, n, total_ncpus, 1);
  189. printf("%d\t%f\t%f\t%f\t%f\t%f\t%f\n", n, alone/1000, idle/1000, idle*100/alone, alone_int/1000, idle_int/1000, idle_int*100/alone_int);
  190. fflush(stdout);
  191. }
  192. free(result);
  193. for (n = 0; n < total_ncpus; n++)
  194. free(buffers[n]);
  195. return EXIT_SUCCESS;
  196. enodev:
  197. fprintf(stderr, "WARNING: No one can execute this task\n");
  198. free(result);
  199. starpu_shutdown();
  200. return STARPU_TEST_SKIPPED;
  201. }