bandwidth.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdio.h>
  17. #include <unistd.h>
  18. #include <starpu.h>
  19. #include "../helper.h"
  20. /*
  21. * Measure the memory bandwidth available to kernels depending on the number of
  22. * kernels and number of idle workers.
  23. */
  24. #ifdef STARPU_QUICK_CHECK
  25. static size_t size = 1024;
  26. static unsigned cpustep = 4;
  27. #else
  28. /* Must be bigger than available cache size per core, 64MiB should be enough */
  29. static size_t size = 64UL << 20;
  30. static unsigned cpustep = 1;
  31. #endif
  32. static unsigned noalone = 0;
  33. static unsigned iter = 30;
  34. static unsigned total_ncpus;
  35. static starpu_pthread_barrier_t barrier;
  36. static float *result;
  37. static void **buffers;
  38. void bw_func(void *descr[], void *arg)
  39. {
  40. void *src = buffers[starpu_worker_get_id()];
  41. void *dst = (void*) ((uintptr_t)src + size);
  42. unsigned i;
  43. double start, stop;
  44. int ret;
  45. memset(src, 0, size);
  46. memset(dst, 0, size);
  47. STARPU_PTHREAD_BARRIER_WAIT(&barrier);
  48. start = starpu_timing_now();
  49. for (i = 0; i < iter; i++)
  50. {
  51. memcpy(dst, src, size);
  52. STARPU_SYNCHRONIZE();
  53. }
  54. stop = starpu_timing_now();
  55. STARPU_PTHREAD_BARRIER_WAIT(&barrier);
  56. result[starpu_worker_get_id()] = (size*iter) / (stop - start);
  57. }
  58. static struct starpu_codelet bw_codelet =
  59. {
  60. .cpu_funcs = {bw_func},
  61. .model = NULL,
  62. .nbuffers = 0,
  63. };
  64. static void usage(char **argv)
  65. {
  66. fprintf(stderr, "Usage: %s [-n iter] [-s size (MB)] [-i increment] [-a]\n", argv[0]);
  67. fprintf(stderr, "\t-n iter\tNumber of iterations\n");
  68. fprintf(stderr, "\t-s size\tBuffer size in MB\n");
  69. fprintf(stderr, "\t-i increment\tCpu number increment\n");
  70. fprintf(stderr, "\t-a\tDo not run the alone test\n");
  71. exit(EXIT_FAILURE);
  72. }
  73. static void parse_args(int argc, char **argv)
  74. {
  75. int c;
  76. while ((c = getopt(argc, argv, "n:s:c:ah")) != -1)
  77. switch(c)
  78. {
  79. case 'n':
  80. iter = atoi(optarg);
  81. break;
  82. case 's':
  83. size = (long)atoi(optarg) << 20;
  84. break;
  85. case 'c':
  86. cpustep = atoi(optarg);
  87. break;
  88. case 'a':
  89. noalone = 1;
  90. break;
  91. case 'h':
  92. usage(argv);
  93. break;
  94. }
  95. }
  96. static unsigned interleave(unsigned i)
  97. {
  98. /* TODO: rather distribute over hierarchy */
  99. if (total_ncpus > 1)
  100. return (i % (total_ncpus/2))*2 + i / (total_ncpus/2);
  101. else
  102. return 0;
  103. }
  104. static float bench(int *argc, char ***argv, unsigned nbusy, unsigned ncpus, int intl)
  105. {
  106. int ret;
  107. unsigned i;
  108. struct starpu_conf conf;
  109. float bw;
  110. starpu_conf_init(&conf);
  111. conf.precedence_over_environment_variables = 1;
  112. conf.ncuda = 0;
  113. conf.nopencl = 0;
  114. conf.nmic = 0;
  115. conf.nmpi_ms = 0;
  116. conf.ncpus = ncpus;
  117. if (intl && nbusy == ncpus)
  118. {
  119. conf.use_explicit_workers_bindid = 1;
  120. for (i = 0; i < ncpus; i++)
  121. conf.workers_bindid[i] = interleave(i);
  122. }
  123. ret = starpu_initialize(&conf, argc, argv);
  124. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  125. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  126. STARPU_PTHREAD_BARRIER_INIT(&barrier, NULL, nbusy);
  127. for (i = 0; i < nbusy; i++)
  128. {
  129. struct starpu_task *task = starpu_task_create();
  130. task->cl = &bw_codelet;
  131. task->execute_on_a_specific_worker = 1;
  132. if (intl && nbusy != ncpus)
  133. task->workerid = interleave(i);
  134. else
  135. task->workerid = i;
  136. ret = starpu_task_submit(task);
  137. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  138. }
  139. starpu_task_wait_for_all();
  140. starpu_shutdown();
  141. for (bw = 0., i = 0; i < nbusy; i++)
  142. {
  143. bw += result[i];
  144. }
  145. return bw;
  146. }
  147. int main(int argc, char **argv)
  148. {
  149. int ret;
  150. unsigned n;
  151. struct starpu_conf conf;
  152. float alone, alone_int, idle, idle_int;
  153. parse_args(argc, argv);
  154. starpu_conf_init(&conf);
  155. conf.precedence_over_environment_variables = 1;
  156. conf.ncuda = 0;
  157. conf.nopencl = 0;
  158. conf.nmic = 0;
  159. conf.nmpi_ms = 0;
  160. ret = starpu_initialize(&conf, &argc, &argv);
  161. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  162. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  163. total_ncpus = starpu_cpu_worker_get_count();
  164. starpu_shutdown();
  165. if (total_ncpus == 0)
  166. return STARPU_TEST_SKIPPED;
  167. result = malloc(total_ncpus * sizeof(result[0]));
  168. buffers = malloc(total_ncpus * sizeof(*buffers));
  169. for (n = 0; n < total_ncpus; n++)
  170. {
  171. #ifdef STARPU_HAVE_POSIX_MEMALIGN
  172. ret = posix_memalign(&buffers[n], getpagesize(), 2*size);
  173. STARPU_ASSERT(ret == 0);
  174. #else
  175. buffers[n] = malloc(2*size);
  176. #endif
  177. }
  178. printf("# nw\talone\t\t+idle\t\tefficiency\talone int.l\t+idle int.l\tefficiency\n");
  179. for (n = cpustep; n <= total_ncpus; n += cpustep)
  180. {
  181. if (noalone)
  182. {
  183. alone = 0.;
  184. alone_int = 0.;
  185. }
  186. else
  187. {
  188. alone = bench(&argc, &argv, n, n, 0);
  189. alone_int = bench(&argc, &argv, n, n, 1);
  190. }
  191. idle = bench(&argc, &argv, n, total_ncpus, 0);
  192. idle_int = bench(&argc, &argv, n, total_ncpus, 1);
  193. printf("%d\t%f\t%f\t%f\t%f\t%f\t%f\n", n, alone/1000, idle/1000, idle*100/alone, alone_int/1000, idle_int/1000, idle_int*100/alone_int);
  194. fflush(stdout);
  195. }
  196. free(result);
  197. for (n = 0; n < total_ncpus; n++)
  198. free(buffers[n]);
  199. return EXIT_SUCCESS;
  200. }