starpurm_dlb.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2017-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /* CPUSET routines */
  17. #ifndef _GNU_SOURCE
  18. #define _GNU_SOURCE
  19. #endif
  20. #include <sched.h>
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. #include <assert.h>
  25. #include <common/config.h>
  26. #include <hwloc.h>
  27. #ifdef HAVE_HWLOC_GLIBC_SCHED_H
  28. #include <hwloc/glibc-sched.h>
  29. #endif
  30. #include <pthread.h>
  31. #include <starpu.h>
  32. #include <starpurm.h>
  33. #include <starpurm_private.h>
  34. #ifndef STARPURM_HAVE_DLB
  35. #error "STARPU-RM DLB support not enabled"
  36. #endif
  37. #include <dlb_sp.h>
  38. #include <dlb_errors.h>
  39. /*
  40. * DLB interfacing
  41. */
  42. static dlb_handler_t dlb_handle;
  43. static cpu_set_t starpurm_process_mask;
  44. static hwloc_cpuset_t starpurm_process_cpuset;
  45. static struct s_starpurm *_starpurm = NULL;
  46. static pthread_mutex_t dlb_handle_mutex = PTHREAD_MUTEX_INITIALIZER;
  47. static int glibc_cpuid_to_unitid[CPU_SETSIZE];
  48. static int *unitid_to_glibc_cpuid = NULL;
  49. static const char * _dlb_error_str(int error_code)
  50. {
  51. const char *s = NULL;
  52. switch (error_code)
  53. {
  54. case DLB_NOUPDT:
  55. s="DLB_NOUPDT";
  56. break;
  57. case DLB_NOTED:
  58. s="DLB_NOTED";
  59. break;
  60. case DLB_SUCCESS:
  61. s="DLB_SUCCESS";
  62. break;
  63. case DLB_ERR_UNKNOWN:
  64. s="DLB_ERR_UNKNOWN";
  65. break;
  66. case DLB_ERR_NOINIT:
  67. s="DLB_ERR_NOINIT";
  68. break;
  69. case DLB_ERR_INIT:
  70. s="DLB_ERR_INIT";
  71. break;
  72. case DLB_ERR_DISBLD:
  73. s="DLB_ERR_DISBLD";
  74. break;
  75. case DLB_ERR_NOSHMEM:
  76. s="DLB_ERR_NOSHMEM";
  77. break;
  78. case DLB_ERR_NOPROC:
  79. s="DLB_ERR_NOPROC";
  80. break;
  81. case DLB_ERR_PDIRTY:
  82. s="DLB_ERR_PDIRTY";
  83. break;
  84. case DLB_ERR_PERM:
  85. s="DLB_ERR_PERM";
  86. break;
  87. case DLB_ERR_TIMEOUT:
  88. s="DLB_ERR_TIMEOUT";
  89. break;
  90. case DLB_ERR_NOCBK:
  91. s="DLB_ERR_NOCBK";
  92. break;
  93. case DLB_ERR_NOENT:
  94. s="DLB_ERR_NOENT";
  95. break;
  96. case DLB_ERR_NOCOMP:
  97. s="DLB_ERR_NOCOMP";
  98. break;
  99. case DLB_ERR_REQST:
  100. s="DLB_ERR_REQST";
  101. break;
  102. case DLB_ERR_NOMEM:
  103. s="DLB_ERR_NOMEM";
  104. break;
  105. case DLB_ERR_NOPOL:
  106. s="DLB_ERR_NOPOL";
  107. break;
  108. default:
  109. s = "<unknown DLB error code>";
  110. break;
  111. }
  112. return s;
  113. }
  114. #define _dlb_check(s,r) do { if ((r) != DLB_SUCCESS) {fprintf(stderr, "%s:%d, %s - DLB call '%s' %s %d (%s)\n",__FILE__, __LINE__, __func__, (s), (r)>0?"returned warning code":"failed with error code", (r), _dlb_error_str((r))); assert(dlb_ret >= DLB_SUCCESS); }} while (0)
  115. #if 0
  116. /* unused for now */
  117. static void _glibc_cpuset_to_hwloc_cpuset(const cpu_set_t *glibc_cpuset, hwloc_cpuset_t *hwloc_cpuset)
  118. {
  119. assert(_starpurm != NULL);
  120. assert(_starpurm->state != state_uninitialized);
  121. struct s_starpurm *rm = _starpurm;
  122. int status = hwloc_cpuset_from_glibc_sched_affinity(rm->topology, *hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t));
  123. assert(status == 0);
  124. }
  125. #endif
  126. static void _hwloc_cpuset_to_glibc_cpuset(const hwloc_cpuset_t hwloc_cpuset, cpu_set_t *glibc_cpuset)
  127. {
  128. assert(_starpurm != NULL);
  129. assert(_starpurm->state != state_uninitialized);
  130. struct s_starpurm *rm = _starpurm;
  131. int status = hwloc_cpuset_to_glibc_sched_affinity(rm->topology, hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t));
  132. assert(status == 0);
  133. }
  134. int starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(const hwloc_cpuset_t hwloc_workers_cpuset)
  135. {
  136. int status = 0;
  137. pthread_mutex_lock(&dlb_handle_mutex);
  138. if (dlb_handle != NULL)
  139. {
  140. hwloc_cpuset_t hwloc_to_lend_cpuset = hwloc_bitmap_alloc();
  141. hwloc_cpuset_t hwloc_to_return_cpuset = hwloc_bitmap_alloc();
  142. hwloc_bitmap_zero(hwloc_to_lend_cpuset);
  143. hwloc_bitmap_zero(hwloc_to_return_cpuset);
  144. hwloc_bitmap_and(hwloc_to_lend_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset);
  145. hwloc_bitmap_andnot(hwloc_to_return_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset);
  146. #ifdef STARPURM_DLB_VERBOSE
  147. {
  148. char * s_to_lend = NULL;
  149. char * s_to_return = NULL;
  150. hwloc_bitmap_asprintf(&s_to_lend, hwloc_to_lend_cpuset);
  151. hwloc_bitmap_asprintf(&s_to_return, hwloc_to_return_cpuset);
  152. fprintf(stderr, "%s: to_lend='%s', to_return='%s'\n", __func__, s_to_lend, s_to_return);
  153. free(s_to_lend);
  154. free(s_to_return);
  155. }
  156. #endif
  157. if (!hwloc_bitmap_iszero(hwloc_to_lend_cpuset))
  158. {
  159. cpu_set_t glibc_to_lend_cpuset;
  160. CPU_ZERO(&glibc_to_lend_cpuset);
  161. _hwloc_cpuset_to_glibc_cpuset(hwloc_to_lend_cpuset, &glibc_to_lend_cpuset);
  162. int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_lend_cpuset);
  163. _dlb_check("DLB_LendCpuMask_sp", dlb_ret);
  164. }
  165. if (!hwloc_bitmap_iszero(hwloc_to_return_cpuset))
  166. {
  167. cpu_set_t glibc_to_return_cpuset;
  168. CPU_ZERO(&glibc_to_return_cpuset);
  169. _hwloc_cpuset_to_glibc_cpuset(hwloc_to_return_cpuset, &glibc_to_return_cpuset);
  170. /* Use DLB_Lend for returning borrowed units. DLB_Return seems to require that
  171. * a reclaim has previously been emitted by the unit owning runtime system */
  172. #if 0
  173. int dlb_ret = DLB_ReturnCpuMask_sp(dlb_handle, &glibc_to_return_cpuset);
  174. _dlb_check("DLB_ReturnCpuMask_sp", dlb_ret);
  175. #else
  176. int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_return_cpuset);
  177. _dlb_check("DLB_LendCpuMask_sp", dlb_ret);
  178. #endif
  179. }
  180. hwloc_bitmap_free(hwloc_to_lend_cpuset);
  181. hwloc_bitmap_free(hwloc_to_return_cpuset);
  182. status = 1;
  183. }
  184. pthread_mutex_unlock(&dlb_handle_mutex);
  185. return status;
  186. }
  187. int starpurm_dlb_notify_starpu_worker_mask_waking_up(const hwloc_cpuset_t hwloc_workers_cpuset)
  188. {
  189. int status = 0;
  190. pthread_mutex_lock(&dlb_handle_mutex);
  191. if (dlb_handle != NULL)
  192. {
  193. hwloc_cpuset_t hwloc_to_reclaim_cpuset = hwloc_bitmap_alloc();
  194. hwloc_cpuset_t hwloc_to_borrow_cpuset = hwloc_bitmap_alloc();
  195. hwloc_bitmap_zero(hwloc_to_reclaim_cpuset);
  196. hwloc_bitmap_zero(hwloc_to_borrow_cpuset);
  197. hwloc_bitmap_and(hwloc_to_reclaim_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset);
  198. hwloc_bitmap_andnot(hwloc_to_borrow_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset);
  199. #ifdef STARPURM_DLB_VERBOSE
  200. {
  201. char * s_to_reclaim = NULL;
  202. char * s_to_borrow = NULL;
  203. hwloc_bitmap_asprintf(&s_to_reclaim, hwloc_to_reclaim_cpuset);
  204. hwloc_bitmap_asprintf(&s_to_borrow, hwloc_to_borrow_cpuset);
  205. fprintf(stderr, "%s: to_reclaim='%s', to_borrow='%s'\n", __func__, s_to_reclaim, s_to_borrow);
  206. free(s_to_reclaim);
  207. free(s_to_borrow);
  208. }
  209. #endif
  210. if (!hwloc_bitmap_iszero(hwloc_to_reclaim_cpuset))
  211. {
  212. cpu_set_t glibc_to_reclaim_cpuset;
  213. CPU_ZERO(&glibc_to_reclaim_cpuset);
  214. _hwloc_cpuset_to_glibc_cpuset(hwloc_to_reclaim_cpuset, &glibc_to_reclaim_cpuset);
  215. int dlb_ret = DLB_ReclaimCpuMask_sp(dlb_handle, &glibc_to_reclaim_cpuset);
  216. _dlb_check("DLB_ReclaimCpuMask_sp", dlb_ret);
  217. }
  218. if (!hwloc_bitmap_iszero(hwloc_to_borrow_cpuset))
  219. {
  220. cpu_set_t glibc_to_borrow_cpuset;
  221. CPU_ZERO(&glibc_to_borrow_cpuset);
  222. _hwloc_cpuset_to_glibc_cpuset(hwloc_to_borrow_cpuset, &glibc_to_borrow_cpuset);
  223. int dlb_ret = DLB_BorrowCpuMask_sp(dlb_handle, &glibc_to_borrow_cpuset);
  224. _dlb_check("DLB_BorrowCpuMask_sp", dlb_ret);
  225. }
  226. hwloc_bitmap_free(hwloc_to_reclaim_cpuset);
  227. hwloc_bitmap_free(hwloc_to_borrow_cpuset);
  228. status = 1;
  229. }
  230. pthread_mutex_unlock(&dlb_handle_mutex);
  231. return status;
  232. }
  233. #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS
  234. #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG
  235. static void _dlb_callback_enable_cpu(int cpuid, void *arg)
  236. #else
  237. static void _dlb_callback_enable_cpu(int cpuid)
  238. #endif
  239. {
  240. #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG
  241. (void) arg;
  242. #endif
  243. int unitid = glibc_cpuid_to_unitid[cpuid];
  244. #ifdef STARPURM_DLB_VERBOSE
  245. fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid);
  246. #endif
  247. if (unitid != -1)
  248. {
  249. starpurm_enqueue_event_cpu_unit_available(unitid);
  250. }
  251. }
  252. #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG
  253. static void _dlb_callback_disable_cpu(int cpuid, void *arg)
  254. #else
  255. static void _dlb_callback_disable_cpu(int cpuid)
  256. #endif
  257. {
  258. #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG
  259. (void) arg;
  260. #endif
  261. int unitid = glibc_cpuid_to_unitid[cpuid];
  262. #ifdef STARPURM_DLB_VERBOSE
  263. fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid);
  264. #endif
  265. if (unitid != -1)
  266. {
  267. /* nothing */
  268. }
  269. }
  270. #endif
  271. void starpurm_dlb_init(struct s_starpurm *rm)
  272. {
  273. _starpurm = rm;
  274. {
  275. int unitid;
  276. int cpuid;
  277. unitid_to_glibc_cpuid = malloc(rm->nunits * sizeof(*unitid_to_glibc_cpuid));
  278. for (cpuid = 0; cpuid<CPU_SETSIZE; cpuid++)
  279. {
  280. glibc_cpuid_to_unitid[cpuid] = -1;
  281. }
  282. for (unitid = 0; unitid < rm->nunits; unitid++)
  283. {
  284. hwloc_cpuset_t unit_cpuset = starpurm_get_unit_cpuset(unitid);
  285. cpu_set_t unit_mask;
  286. CPU_ZERO(&unit_mask);
  287. _hwloc_cpuset_to_glibc_cpuset(unit_cpuset, &unit_mask);
  288. unitid_to_glibc_cpuid[unitid] = -1;
  289. for (cpuid = 0; cpuid<CPU_SETSIZE; cpuid++)
  290. {
  291. if (CPU_ISSET(cpuid, &unit_mask))
  292. {
  293. /* assume no overlap on units cpuid */
  294. assert(glibc_cpuid_to_unitid[cpuid] == -1);
  295. unitid_to_glibc_cpuid[unitid] = cpuid;
  296. glibc_cpuid_to_unitid[cpuid] = unitid;
  297. break;
  298. }
  299. }
  300. #ifdef STARPURM_DLB_VERBOSE
  301. {
  302. char * s_unit = NULL;
  303. hwloc_bitmap_asprintf(&s_unit, unit_cpuset);
  304. fprintf(stderr, "%s: unitid=%d, cpuid=%d, unit hwloc cpuset=%s\n", __func__, unitid, cpuid, s_unit);
  305. free(s_unit);
  306. }
  307. #endif
  308. hwloc_bitmap_free(unit_cpuset);
  309. }
  310. }
  311. CPU_ZERO(&starpurm_process_mask);
  312. starpurm_process_cpuset = hwloc_bitmap_dup(rm->selected_cpuset);
  313. hwloc_bitmap_and(starpurm_process_cpuset, starpurm_process_cpuset, rm->initially_owned_cpuset_mask);
  314. _hwloc_cpuset_to_glibc_cpuset(starpurm_process_cpuset, &starpurm_process_mask);
  315. #ifdef STARPURM_DLB_VERBOSE
  316. {
  317. char * s_reachable = NULL;
  318. char * s_initially_owned = NULL;
  319. hwloc_bitmap_asprintf(&s_reachable, rm->selected_cpuset);
  320. hwloc_bitmap_asprintf(&s_initially_owned, starpurm_process_cpuset);
  321. fprintf(stderr, "%s: StarPU reachable units='%s', StarPU initially owned units='%s'\n", __func__, s_reachable, s_initially_owned);
  322. free(s_reachable);
  323. free(s_initially_owned);
  324. }
  325. #endif
  326. pthread_mutex_lock(&dlb_handle_mutex);
  327. /* TODO: autodetect DLB policy according to DLB version */
  328. #if 1
  329. dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--lewi=yes --drom=no --mode=async");
  330. #else
  331. dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--policy=new --drom=no --mode=async");
  332. #endif
  333. /* cpu-based callbacks are mutually exclusive with mask-based callbacks,
  334. * we only register cpu-based callbacks */
  335. int dlb_ret;
  336. #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS
  337. #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG
  338. dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, NULL);
  339. _dlb_check("DLB_CallbackSet_sp", dlb_ret);
  340. dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu, NULL);
  341. _dlb_check("DLB_CallbackSet_sp", dlb_ret);
  342. #else
  343. dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu);
  344. _dlb_check("DLB_CallbackSet_sp", dlb_ret);
  345. dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu);
  346. _dlb_check("DLB_CallbackSet_sp", dlb_ret);
  347. #endif
  348. #endif
  349. dlb_ret = DLB_Enable_sp(dlb_handle);
  350. _dlb_check("DLB_Enable_sp", dlb_ret);
  351. pthread_mutex_unlock(&dlb_handle_mutex);
  352. }
  353. void starpurm_dlb_exit(void)
  354. {
  355. pthread_mutex_lock(&dlb_handle_mutex);
  356. dlb_handler_t dlb_handle_save = dlb_handle;
  357. dlb_handle = 0;
  358. pthread_mutex_unlock(&dlb_handle_mutex);
  359. /* lend every resources that StarPU may still have */
  360. DLB_Lend_sp(dlb_handle_save);
  361. DLB_Return_sp(dlb_handle_save);
  362. pthread_mutex_lock(&dlb_handle_mutex);
  363. DLB_Disable_sp(dlb_handle_save);
  364. DLB_Finalize_sp(dlb_handle_save);
  365. hwloc_bitmap_free(starpurm_process_cpuset);
  366. free(unitid_to_glibc_cpuid);
  367. pthread_mutex_unlock(&dlb_handle_mutex);
  368. }