06_spawn.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /* StarPU --- Resource Management Layer.
  2. *
  3. * Copyright (C) 2017, 2018 Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /* This example shows a basic StarPU vector scale app on top of StarPURM,
  17. * making use of both the main RM API and the spawn_kernel_on_cpus API func */
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <assert.h>
  21. #include <starpu.h>
  22. #include <starpurm.h>
  23. static int rm_cpu_type_id = -1;
  24. static int rm_nb_cpu_units = 0;
  25. static void usage(void);
  26. static void test1(const int N);
  27. static void test2(const int N, const int task_mult);
  28. static void init_rm_infos(void);
  29. /* vector scale codelet */
  30. static void vector_scale_func(void *cl_buffers[], void *cl_arg)
  31. {
  32. double scalar = -1.0;
  33. int n = STARPU_VECTOR_GET_NX(cl_buffers[0]);
  34. double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]);
  35. int i;
  36. starpu_codelet_unpack_args(cl_arg, &scalar);
  37. int workerid = starpu_worker_get_id();
  38. hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid);
  39. {
  40. int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset);
  41. char str1[strl1+1];
  42. hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset);
  43. printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1);
  44. }
  45. hwloc_bitmap_free(worker_cpuset);
  46. for (i = 0; i < n; i++)
  47. {
  48. vector[i] *= scalar;
  49. }
  50. }
  51. static struct starpu_codelet vector_scale_cl =
  52. {
  53. .cpu_funcs = {vector_scale_func},
  54. .nbuffers = 1
  55. };
  56. /* main routines */
  57. static void usage(void)
  58. {
  59. fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n");
  60. exit(1);
  61. }
  62. static void test1(const int N)
  63. {
  64. double *vector = NULL;
  65. const double scalar = 2.0;
  66. starpu_data_handle_t vector_handle;
  67. int ret;
  68. vector = malloc(N * sizeof(*vector));
  69. {
  70. int i;
  71. for (i = 0; i < N; i++)
  72. {
  73. vector[i] = i;
  74. }
  75. }
  76. starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector));
  77. ret = starpu_task_insert(&vector_scale_cl,
  78. STARPU_RW, vector_handle,
  79. STARPU_VALUE, &scalar, sizeof(scalar),
  80. 0);
  81. assert(ret == 0);
  82. starpu_task_wait_for_all();
  83. starpu_data_unregister(vector_handle);
  84. {
  85. int i;
  86. for (i = 0; i < N; i++)
  87. {
  88. double d_i = i;
  89. if (vector[i] != d_i*scalar)
  90. {
  91. fprintf(stderr, "%s: check_failed\n", __func__);
  92. exit(1);
  93. }
  94. }
  95. }
  96. free(vector);
  97. }
  98. static void test2(const int N, const int task_mult)
  99. {
  100. double *vector = NULL;
  101. const double scalar = 3.0;
  102. starpu_data_handle_t vector_handle;
  103. int ret;
  104. vector = malloc(N * sizeof(*vector));
  105. {
  106. int i;
  107. for (i = 0; i < N; i++)
  108. {
  109. vector[i] = i;
  110. }
  111. }
  112. starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector));
  113. struct starpu_data_filter partition_filter =
  114. {
  115. .filter_func = starpu_vector_filter_block,
  116. .nchildren = rm_nb_cpu_units * task_mult
  117. };
  118. starpu_data_partition(vector_handle, &partition_filter);
  119. {
  120. int i;
  121. for (i = 0; i < rm_nb_cpu_units*task_mult; i++)
  122. {
  123. starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i);
  124. ret = starpu_task_insert(&vector_scale_cl,
  125. STARPU_RW, sub_vector_handle,
  126. STARPU_VALUE, &scalar, sizeof(scalar),
  127. 0);
  128. assert(ret == 0);
  129. }
  130. }
  131. starpu_task_wait_for_all();
  132. starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM);
  133. starpu_data_unregister(vector_handle);
  134. {
  135. int i;
  136. for (i = 0; i < N; i++)
  137. {
  138. double d_i = i;
  139. if (vector[i] != d_i*scalar)
  140. {
  141. fprintf(stderr, "%s: check_failed\n", __func__);
  142. exit(1);
  143. }
  144. }
  145. }
  146. free(vector);
  147. }
  148. static void init_rm_infos(void)
  149. {
  150. int cpu_type = starpurm_get_device_type_id("cpu");
  151. int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type);
  152. if (nb_cpu_units < 1)
  153. {
  154. /* No CPU unit available. */
  155. exit(77);
  156. }
  157. rm_cpu_type_id = cpu_type;
  158. rm_nb_cpu_units = nb_cpu_units;
  159. }
  160. static void kernel_to_spawn(void *args)
  161. {
  162. int param_N = *(int*)args;
  163. test1(param_N);
  164. test2(param_N, 1);
  165. test2(param_N, 10);
  166. test2(param_N, 100);
  167. }
  168. int main(int argc, char *argv[])
  169. {
  170. int param_N = 1000000;
  171. int drs_enabled;
  172. if (argc > 1)
  173. {
  174. param_N = atoi(argv[1]);
  175. if (param_N < 1)
  176. {
  177. usage();
  178. }
  179. }
  180. starpurm_initialize();
  181. init_rm_infos();
  182. if (rm_nb_cpu_units > 1)
  183. {
  184. const int half_nb_cpus = rm_nb_cpu_units/2;
  185. starpurm_set_drs_enable(NULL);
  186. drs_enabled = starpurm_drs_enabled_p();
  187. assert(drs_enabled != 0);
  188. {
  189. hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset();
  190. {
  191. int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset);
  192. char str1[strl1+1];
  193. hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset);
  194. printf("all cpus cpuset = %s\n", str1);
  195. }
  196. int first_idx = hwloc_bitmap_first(cpu_cpuset);
  197. int last_idx = hwloc_bitmap_last(cpu_cpuset);
  198. hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc();
  199. assert(sel_cpuset != NULL);
  200. int count = 0;
  201. int idx = first_idx;
  202. while (idx != -1 && idx <= last_idx && count < half_nb_cpus)
  203. {
  204. if (hwloc_bitmap_isset(cpu_cpuset, idx))
  205. {
  206. hwloc_bitmap_set(sel_cpuset, idx);
  207. count ++;
  208. }
  209. idx = hwloc_bitmap_next(cpu_cpuset, idx);
  210. }
  211. assert(count == half_nb_cpus);
  212. {
  213. int strl1 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset);
  214. char str1[strl1+1];
  215. hwloc_bitmap_snprintf(str1, strl1+1, sel_cpuset);
  216. printf("spawning a kernel on cpuset = %s\n", str1);
  217. }
  218. starpurm_spawn_kernel_on_cpus(NULL, kernel_to_spawn, &param_N, sel_cpuset);
  219. hwloc_bitmap_free(sel_cpuset);
  220. hwloc_bitmap_free(cpu_cpuset);
  221. }
  222. printf("withdrawing %d cpus from StarPU\n", half_nb_cpus);
  223. starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus);
  224. test1(param_N);
  225. test2(param_N, 1);
  226. test2(param_N, 10);
  227. test2(param_N, 100);
  228. printf("assigning %d cpus to StarPU\n", half_nb_cpus);
  229. starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus);
  230. test1(param_N);
  231. test2(param_N, 1);
  232. test2(param_N, 10);
  233. test2(param_N, 100);
  234. starpurm_set_drs_disable(NULL);
  235. drs_enabled = starpurm_drs_enabled_p();
  236. assert(drs_enabled == 0);
  237. }
  238. starpurm_shutdown();
  239. return 0;
  240. }