malloc.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2014,2016-2017 Inria
  4. * Copyright (C) 2009-2017 Université de Bordeaux
  5. * Copyright (C) 2010-2017 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <errno.h>
  19. #include <core/workers.h>
  20. #include <core/disk.h>
  21. #include <common/config.h>
  22. #include <common/fxt.h>
  23. #include <starpu.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. #include <datawizard/memory_manager.h>
  26. #include <datawizard/memory_nodes.h>
  27. #include <datawizard/malloc.h>
  28. #include <core/simgrid.h>
  29. #include <core/task.h>
  30. #ifdef STARPU_SIMGRID
  31. #include <sys/mman.h>
  32. #include <fcntl.h>
  33. #include <smpi/smpi.h>
  34. #endif
  35. #ifdef STARPU_HAVE_HWLOC
  36. #include <hwloc.h>
  37. #ifndef HWLOC_API_VERSION
  38. #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
  39. #endif
  40. #if HWLOC_API_VERSION < 0x00010b00
  41. #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
  42. #endif
  43. #endif
  44. #ifndef O_BINARY
  45. #define O_BINARY 0
  46. #endif
  47. #ifndef MAP_POPULATE
  48. #define MAP_POPULATE 0
  49. #endif
  50. static size_t _malloc_align = sizeof(void*);
  51. static int disable_pinning;
  52. static int malloc_on_node_default_flags[STARPU_MAXNODES];
  53. /* This file is used for implementing "folded" allocation */
  54. #ifdef STARPU_SIMGRID
  55. #if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
  56. /* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
  57. static int bogusfile = -1;
  58. static unsigned long _starpu_malloc_simulation_fold;
  59. #endif
  60. #endif
  61. void starpu_malloc_set_align(size_t align)
  62. {
  63. STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
  64. if (_malloc_align < align)
  65. _malloc_align = align;
  66. }
  67. #if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
  68. struct malloc_pinned_codelet_struct
  69. {
  70. void **ptr;
  71. size_t dim;
  72. };
  73. #endif
  74. /* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */
  75. //#ifdef STARPU_USE_OPENCL
  76. //static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  77. //{
  78. // struct malloc_pinned_codelet_struct *s = arg;
  79. // // _STARPU_MALLOC(*(s->ptr), s->dim);
  80. // starpu_opencl_allocate_memory(devid, (void **)(s->ptr), s->dim, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR);
  81. //}
  82. //#endif
  83. #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  84. static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  85. {
  86. struct malloc_pinned_codelet_struct *s = arg;
  87. cudaError_t cures;
  88. cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable);
  89. if (STARPU_UNLIKELY(cures))
  90. STARPU_CUDA_REPORT_ERROR(cures);
  91. }
  92. #endif
  93. #if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)) && !defined(STARPU_SIMGRID)// || defined(STARPU_USE_OPENCL)
  94. static struct starpu_perfmodel malloc_pinned_model =
  95. {
  96. .type = STARPU_HISTORY_BASED,
  97. .symbol = "malloc_pinned"
  98. };
  99. static struct starpu_codelet malloc_pinned_cl =
  100. {
  101. .cuda_funcs = {malloc_pinned_cuda_codelet},
  102. //#ifdef STARPU_USE_OPENCL
  103. // .opencl_funcs = {malloc_pinned_opencl_codelet},
  104. //#endif
  105. .nbuffers = 0,
  106. .model = &malloc_pinned_model
  107. };
  108. #endif
  109. /* Allocation in CPU RAM */
  110. int starpu_malloc_flags(void **A, size_t dim, int flags)
  111. {
  112. return _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, A, dim, flags);
  113. }
  114. int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int flags)
  115. {
  116. int ret=0;
  117. STARPU_ASSERT(A);
  118. if (flags & STARPU_MALLOC_COUNT)
  119. {
  120. if (!(flags & STARPU_MALLOC_NORECLAIM))
  121. while (starpu_memory_allocate(dst_node, dim, flags) != 0)
  122. {
  123. size_t freed;
  124. size_t reclaim = 2 * dim;
  125. _STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", (long)reclaim);
  126. _STARPU_TRACE_START_MEMRECLAIM(dst_node,0);
  127. freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  128. _STARPU_TRACE_END_MEMRECLAIM(dst_node,0);
  129. if (freed < dim && !(flags & STARPU_MEMORY_WAIT))
  130. {
  131. // We could not reclaim enough memory
  132. *A = NULL;
  133. return -ENOMEM;
  134. }
  135. }
  136. else if (flags & STARPU_MEMORY_WAIT)
  137. starpu_memory_allocate(dst_node, dim, flags);
  138. else
  139. starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW);
  140. }
  141. if (flags & STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0)
  142. {
  143. if (_starpu_can_submit_cuda_task())
  144. {
  145. #ifdef STARPU_SIMGRID
  146. /* FIXME: CUDA seems to be taking 650µs every 1MiB.
  147. * Ideally we would simulate this batching in 1MiB requests
  148. * instead of computing an average value.
  149. */
  150. if (_starpu_simgrid_cuda_malloc_cost())
  151. MSG_process_sleep((float) dim * 0.000650 / 1048576.);
  152. #else /* STARPU_SIMGRID */
  153. #ifdef STARPU_USE_CUDA
  154. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  155. cudaError_t cures;
  156. cures = cudaHostAlloc(A, dim, cudaHostAllocPortable);
  157. if (STARPU_UNLIKELY(cures))
  158. {
  159. STARPU_CUDA_REPORT_ERROR(cures);
  160. ret = -ENOMEM;
  161. }
  162. goto end;
  163. #else
  164. int push_res;
  165. /* Old versions of CUDA are not thread-safe, we have to
  166. * run cudaHostAlloc from CUDA workers */
  167. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned allocation must not be done from task or callback");
  168. struct malloc_pinned_codelet_struct s =
  169. {
  170. .ptr = A,
  171. .dim = dim
  172. };
  173. malloc_pinned_cl.where = STARPU_CUDA;
  174. struct starpu_task *task = starpu_task_create();
  175. task->name = "cuda_malloc_pinned";
  176. task->callback_func = NULL;
  177. task->cl = &malloc_pinned_cl;
  178. task->cl_arg = &s;
  179. task->synchronous = 1;
  180. _starpu_exclude_task_from_dag(task);
  181. push_res = _starpu_task_submit_internally(task);
  182. STARPU_ASSERT(push_res != -ENODEV);
  183. goto end;
  184. #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */
  185. #endif /* STARPU_USE_CUDA */
  186. // }
  187. // else if (_starpu_can_submit_opencl_task())
  188. // {
  189. //#ifdef STARPU_USE_OPENCL
  190. // int push_res;
  191. //
  192. // STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL allocation must not be done from task or callback");
  193. //
  194. // struct malloc_pinned_codelet_struct s =
  195. // {
  196. // .ptr = A,
  197. // .dim = dim
  198. // };
  199. //
  200. // malloc_pinned_cl.where = STARPU_OPENCL;
  201. // struct starpu_task *task = starpu_task_create();
  202. // task->name = "opencl_malloc_pinned";
  203. // task->callback_func = NULL;
  204. // task->cl = &malloc_pinned_cl;
  205. // task->cl_arg = &s;
  206. // task->synchronous = 1;
  207. //
  208. // _starpu_exclude_task_from_dag(task);
  209. //
  210. // push_res = _starpu_task_submit_internally(task);
  211. // STARPU_ASSERT(push_res != -ENODEV);
  212. // goto end;
  213. //#endif /* STARPU_USE_OPENCL */
  214. #endif /* STARPU_SIMGRID */
  215. }
  216. }
  217. #ifdef STARPU_SIMGRID
  218. if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
  219. {
  220. #if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
  221. *A = SMPI_SHARED_MALLOC(dim);
  222. #else
  223. /* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
  224. /* Use "folded" allocation: the same file is mapped several
  225. * times contiguously, to get a memory area one can read/write,
  226. * without consuming memory */
  227. /* First reserve memory area */
  228. void *buf = mmap (NULL, dim, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  229. unsigned i;
  230. if (buf == MAP_FAILED)
  231. {
  232. _STARPU_DISP("Warning: could not allocate %luMiB of memory, you need to run \"sysctl vm.overcommit_memory=1\" as root to allow so big allocations\n", (unsigned long) (dim >> 20));
  233. ret = -ENOMEM;
  234. *A = NULL;
  235. }
  236. else
  237. {
  238. if (bogusfile == -1)
  239. {
  240. char *path = starpu_getenv("TMPDIR");
  241. if (!path)
  242. path = starpu_getenv("TEMP");
  243. if (!path)
  244. path = starpu_getenv("TMP");
  245. if (!path)
  246. path = "/tmp";
  247. /* Create bogus file if not done already */
  248. char *name = _starpu_mktemp(path, O_RDWR | O_BINARY, &bogusfile);
  249. char *dumb;
  250. if (!name)
  251. {
  252. ret = errno;
  253. munmap(buf, dim);
  254. *A = NULL;
  255. goto end;
  256. }
  257. unlink(name);
  258. free(name);
  259. _STARPU_CALLOC(dumb, 1,_starpu_malloc_simulation_fold);
  260. write(bogusfile, dumb, _starpu_malloc_simulation_fold);
  261. free(dumb);
  262. }
  263. /* Map the bogus file in place of the anonymous memory */
  264. for (i = 0; i < dim / _starpu_malloc_simulation_fold; i++)
  265. {
  266. void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold);
  267. void *res = mmap(pos, _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0);
  268. STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno));
  269. }
  270. if (dim % _starpu_malloc_simulation_fold)
  271. {
  272. void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold);
  273. void *res = mmap(pos, dim % _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0);
  274. STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno));
  275. }
  276. *A = buf;
  277. }
  278. #endif
  279. }
  280. else
  281. #endif
  282. if (_starpu_can_submit_scc_task())
  283. {
  284. #ifdef STARPU_USE_SCC
  285. _starpu_scc_allocate_shared_memory(A, dim);
  286. #endif
  287. }
  288. #ifdef STARPU_HAVE_HWLOC
  289. if (starpu_memory_nodes_get_numa_count() > 1)
  290. {
  291. struct _starpu_machine_config *config = _starpu_get_machine_config();
  292. hwloc_topology_t hwtopology = config->topology.hwtopology;
  293. hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node));
  294. hwloc_bitmap_t nodeset = numa_node_obj->nodeset;
  295. #if HWLOC_API_VERSION >= 0x00020000
  296. *A = hwloc_alloc_membind(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags | HWLOC_MEMBIND_BYNODESET);
  297. #else
  298. *A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND | HWLOC_MEMBIND_NOCPUBIND, flags);
  299. #endif
  300. //fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A);
  301. if (!*A)
  302. ret = -ENOMEM;
  303. }
  304. #endif /* STARPU_HAVE_HWLOC */
  305. else
  306. #ifdef STARPU_HAVE_POSIX_MEMALIGN
  307. if (_malloc_align != sizeof(void*))
  308. {
  309. if (posix_memalign(A, _malloc_align, dim))
  310. {
  311. ret = -ENOMEM;
  312. *A = NULL;
  313. }
  314. }
  315. else
  316. #elif defined(STARPU_HAVE_MEMALIGN)
  317. if (_malloc_align != sizeof(void*))
  318. {
  319. *A = memalign(_malloc_align, dim);
  320. if (!*A)
  321. ret = -ENOMEM;
  322. }
  323. else
  324. #endif /* STARPU_HAVE_POSIX_MEMALIGN */
  325. {
  326. *A = malloc(dim);
  327. if (!*A)
  328. ret = -ENOMEM;
  329. }
  330. #if defined(STARPU_SIMGRID) || defined(STARPU_USE_CUDA)
  331. end:
  332. #endif
  333. if (ret == 0)
  334. {
  335. STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
  336. }
  337. else if (flags & STARPU_MALLOC_COUNT)
  338. {
  339. starpu_memory_deallocate(dst_node, dim);
  340. }
  341. return ret;
  342. }
  343. int starpu_malloc(void **A, size_t dim)
  344. {
  345. return starpu_malloc_flags(A, dim, STARPU_MALLOC_PINNED);
  346. }
  347. #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  348. static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  349. {
  350. cudaError_t cures;
  351. cures = cudaFreeHost(arg);
  352. if (STARPU_UNLIKELY(cures))
  353. STARPU_CUDA_REPORT_ERROR(cures);
  354. }
  355. #endif
  356. //#ifdef STARPU_USE_OPENCL
  357. //static void free_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  358. //{
  359. // // free(arg);
  360. // int err = clReleaseMemObject(arg);
  361. // if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  362. //}
  363. //#endif
  364. #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) // || defined(STARPU_USE_OPENCL)
  365. static struct starpu_perfmodel free_pinned_model =
  366. {
  367. .type = STARPU_HISTORY_BASED,
  368. .symbol = "free_pinned"
  369. };
  370. static struct starpu_codelet free_pinned_cl =
  371. {
  372. .cuda_funcs = {free_pinned_cuda_codelet},
  373. //#ifdef STARPU_USE_OPENCL
  374. // .opencl_funcs = {free_pinned_opencl_codelet},
  375. //#endif
  376. .nbuffers = 0,
  377. .model = &free_pinned_model
  378. };
  379. #endif
  380. int starpu_free_flags(void *A, size_t dim, int flags)
  381. {
  382. return _starpu_free_flags_on_node(STARPU_MAIN_RAM, A, dim, flags);
  383. }
  384. int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags)
  385. {
  386. if (flags & STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0)
  387. {
  388. if (_starpu_can_submit_cuda_task())
  389. {
  390. #ifdef STARPU_SIMGRID
  391. /* TODO: simulate CUDA barrier */
  392. #else /* !STARPU_SIMGRID */
  393. #ifdef STARPU_USE_CUDA
  394. #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER
  395. if (!starpu_is_initialized())
  396. {
  397. #endif
  398. /* This is especially useful when starpu_free is called from
  399. * the GCC-plugin. starpu_shutdown will probably have already
  400. * been called, so we will not be able to submit a task. */
  401. cudaError_t err = cudaFreeHost(A);
  402. if (STARPU_UNLIKELY(err))
  403. STARPU_CUDA_REPORT_ERROR(err);
  404. goto out;
  405. #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER
  406. }
  407. else
  408. {
  409. int push_res;
  410. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned deallocation must not be done from task or callback");
  411. free_pinned_cl.where = STARPU_CUDA;
  412. struct starpu_task *task = starpu_task_create();
  413. task->name = "cuda_free_pinned";
  414. task->callback_func = NULL;
  415. task->cl = &free_pinned_cl;
  416. task->cl_arg = A;
  417. task->synchronous = 1;
  418. _starpu_exclude_task_from_dag(task);
  419. push_res = _starpu_task_submit_internally(task);
  420. STARPU_ASSERT(push_res != -ENODEV);
  421. goto out;
  422. }
  423. #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */
  424. #endif /* STARPU_USE_CUDA */
  425. #endif /* STARPU_SIMGRID */
  426. }
  427. // else if (_starpu_can_submit_opencl_task())
  428. // {
  429. //#ifdef STARPU_USE_OPENCL
  430. // int push_res;
  431. //
  432. // STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL deallocation must not be done from task or callback");
  433. //
  434. // free_pinned_cl.where = STARPU_OPENCL;
  435. // struct starpu_task *task = starpu_task_create();
  436. // task->name = "opencl_free_pinned";
  437. // task->callback_func = NULL;
  438. // task->cl = &free_pinned_cl;
  439. // task->cl_arg = A;
  440. // task->synchronous = 1;
  441. //
  442. // _starpu_exclude_task_from_dag(task);
  443. //
  444. // push_res = starpu_task_submit(task);
  445. // STARPU_ASSERT(push_res != -ENODEV);
  446. // goto out;
  447. // }
  448. //#endif
  449. }
  450. #ifdef STARPU_SIMGRID
  451. if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
  452. {
  453. #if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 15)
  454. SMPI_SHARED_FREE(A);
  455. #else
  456. /* TODO: drop when simgrid 3.15 is reasonably largely used by people who need the feature */
  457. munmap(A, dim);
  458. #endif
  459. }
  460. else
  461. #endif
  462. if (_starpu_can_submit_scc_task())
  463. {
  464. #ifdef STARPU_USE_SCC
  465. _starpu_scc_free_shared_memory(A);
  466. #endif
  467. }
  468. #ifdef STARPU_HAVE_HWLOC
  469. else if (starpu_memory_nodes_get_numa_count() > 1)
  470. {
  471. struct _starpu_machine_config *config = _starpu_get_machine_config();
  472. hwloc_topology_t hwtopology = config->topology.hwtopology;
  473. hwloc_free(hwtopology, A, dim);
  474. }
  475. #endif /* STARPU_HAVE_HWLOC */
  476. else
  477. free(A);
  478. #if !defined(STARPU_SIMGRID) && defined(STARPU_USE_CUDA)
  479. out:
  480. #endif
  481. if (flags & STARPU_MALLOC_COUNT)
  482. {
  483. starpu_memory_deallocate(dst_node, dim);
  484. }
  485. return 0;
  486. }
  487. int starpu_free(void *A)
  488. {
  489. return starpu_free_flags(A, 0, STARPU_MALLOC_PINNED);
  490. }
  491. #ifdef STARPU_SIMGRID
  492. static starpu_pthread_mutex_t cuda_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  493. static starpu_pthread_mutex_t opencl_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  494. #endif
  495. static uintptr_t
  496. _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags)
  497. {
  498. uintptr_t addr = 0;
  499. #if defined(STARPU_USE_CUDA) && !defined(STARPU_SIMGRID)
  500. cudaError_t status;
  501. #endif
  502. /* Handle count first */
  503. if (flags & STARPU_MALLOC_COUNT)
  504. {
  505. if (starpu_memory_allocate(dst_node, size, flags) != 0)
  506. return 0;
  507. /* And prevent double-count in starpu_malloc_flags */
  508. flags &= ~STARPU_MALLOC_COUNT;
  509. }
  510. switch(starpu_node_get_kind(dst_node))
  511. {
  512. case STARPU_CPU_RAM:
  513. {
  514. _starpu_malloc_flags_on_node(dst_node, (void**) &addr, size,
  515. #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  516. /* without memcpy_peer, we can not
  517. * allocated pinned memory, since it
  518. * requires waiting for a task, and we
  519. * may be called with a spinlock held
  520. */
  521. flags & ~STARPU_MALLOC_PINNED
  522. #else
  523. flags
  524. #endif
  525. );
  526. break;
  527. }
  528. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  529. case STARPU_CUDA_RAM:
  530. {
  531. #ifdef STARPU_SIMGRID
  532. static uintptr_t last[STARPU_MAXNODES];
  533. #ifdef STARPU_DEVEL
  534. #warning TODO: record used memory, using a simgrid property to know the available memory
  535. #endif
  536. /* Sleep for the allocation */
  537. STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
  538. if (_starpu_simgrid_cuda_malloc_cost())
  539. MSG_process_sleep(0.000175);
  540. if (!last[dst_node])
  541. last[dst_node] = 1<<10;
  542. addr = last[dst_node];
  543. last[dst_node]+=size;
  544. STARPU_ASSERT(last[dst_node] >= addr);
  545. STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
  546. #else
  547. unsigned devid = _starpu_memory_node_get_devid(dst_node);
  548. #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  549. starpu_cuda_set_device(devid);
  550. #else
  551. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  552. if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid)
  553. STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA");
  554. #endif
  555. status = cudaMalloc((void **)&addr, size);
  556. if (!addr || (status != cudaSuccess))
  557. {
  558. if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
  559. STARPU_CUDA_REPORT_ERROR(status);
  560. addr = 0;
  561. }
  562. #endif
  563. break;
  564. }
  565. #endif
  566. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  567. case STARPU_OPENCL_RAM:
  568. {
  569. #ifdef STARPU_SIMGRID
  570. static uintptr_t last[STARPU_MAXNODES];
  571. /* Sleep for the allocation */
  572. STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
  573. if (_starpu_simgrid_cuda_malloc_cost())
  574. MSG_process_sleep(0.000175);
  575. if (!last[dst_node])
  576. last[dst_node] = 1<<10;
  577. addr = last[dst_node];
  578. last[dst_node]+=size;
  579. STARPU_ASSERT(last[dst_node] >= addr);
  580. STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
  581. #else
  582. int ret;
  583. cl_mem ptr;
  584. ret = starpu_opencl_allocate_memory(_starpu_memory_node_get_devid(dst_node), &ptr, size, CL_MEM_READ_WRITE);
  585. if (ret)
  586. {
  587. addr = 0;
  588. }
  589. else
  590. {
  591. addr = (uintptr_t)ptr;
  592. }
  593. #endif
  594. break;
  595. }
  596. #endif
  597. case STARPU_DISK_RAM:
  598. {
  599. addr = (uintptr_t) _starpu_disk_alloc(dst_node, size);
  600. break;
  601. }
  602. #ifdef STARPU_USE_MIC
  603. case STARPU_MIC_RAM:
  604. if (_starpu_mic_allocate_memory((void **)(&addr), size, dst_node))
  605. addr = 0;
  606. break;
  607. #endif
  608. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  609. case STARPU_MPI_MS_RAM:
  610. if (_starpu_mpi_src_allocate_memory((void **)(&addr), size, dst_node))
  611. addr = 0;
  612. break;
  613. #endif
  614. #ifdef STARPU_USE_SCC
  615. case STARPU_SCC_RAM:
  616. if (_starpu_scc_allocate_memory((void **)(&addr), size, dst_node))
  617. addr = 0;
  618. break;
  619. #endif
  620. default:
  621. STARPU_ABORT();
  622. }
  623. if (addr == 0)
  624. {
  625. // Allocation failed, gives the memory back to the memory manager
  626. _STARPU_TRACE_MEMORY_FULL(size);
  627. starpu_memory_deallocate(dst_node, size);
  628. }
  629. return addr;
  630. }
  631. void
  632. _starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags)
  633. {
  634. int count = flags & STARPU_MALLOC_COUNT;
  635. flags &= ~STARPU_MALLOC_COUNT;
  636. enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
  637. switch(kind)
  638. {
  639. case STARPU_CPU_RAM:
  640. _starpu_free_flags_on_node(dst_node, (void*)addr, size,
  641. #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  642. flags & ~STARPU_MALLOC_PINNED
  643. #else
  644. flags
  645. #endif
  646. );
  647. break;
  648. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  649. case STARPU_CUDA_RAM:
  650. {
  651. #ifdef STARPU_SIMGRID
  652. STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
  653. /* Sleep for the free */
  654. if (_starpu_simgrid_cuda_malloc_cost())
  655. MSG_process_sleep(0.000750);
  656. STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
  657. /* CUDA also synchronizes roughly everything on cudaFree */
  658. _starpu_simgrid_sync_gpus();
  659. #else
  660. cudaError_t err;
  661. unsigned devid = _starpu_memory_node_get_devid(dst_node);
  662. #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  663. starpu_cuda_set_device(devid);
  664. #else
  665. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  666. if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid)
  667. STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA");
  668. #endif
  669. err = cudaFree((void*)addr);
  670. if (STARPU_UNLIKELY(err != cudaSuccess
  671. #ifdef STARPU_OPENMP
  672. /* When StarPU is used as Open Runtime support,
  673. * starpu_omp_shutdown() will usually be called from a
  674. * destructor, in which case cudaThreadExit() reports a
  675. * cudaErrorCudartUnloading here. There should not
  676. * be any remaining tasks running at this point so
  677. * we can probably ignore it without much consequences. */
  678. && err != cudaErrorCudartUnloading
  679. #endif /* STARPU_OPENMP */
  680. ))
  681. STARPU_CUDA_REPORT_ERROR(err);
  682. #endif
  683. break;
  684. }
  685. #endif
  686. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  687. case STARPU_OPENCL_RAM:
  688. {
  689. #ifdef STARPU_SIMGRID
  690. STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
  691. /* Sleep for the free */
  692. if (_starpu_simgrid_cuda_malloc_cost())
  693. MSG_process_sleep(0.000750);
  694. STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
  695. #else
  696. cl_int err;
  697. err = clReleaseMemObject((void*)addr);
  698. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  699. STARPU_OPENCL_REPORT_ERROR(err);
  700. #endif
  701. break;
  702. }
  703. #endif
  704. case STARPU_DISK_RAM:
  705. {
  706. _starpu_disk_free (dst_node, (void *) addr , size);
  707. break;
  708. }
  709. #ifdef STARPU_USE_MIC
  710. case STARPU_MIC_RAM:
  711. _starpu_mic_free_memory((void*) addr, size, dst_node);
  712. break;
  713. #endif
  714. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  715. case STARPU_MPI_MS_RAM:
  716. _starpu_mpi_source_free_memory((void*) addr, dst_node);
  717. break;
  718. #endif
  719. #ifdef STARPU_USE_SCC
  720. case STARPU_SCC_RAM:
  721. _starpu_scc_free_memory((void *) addr, dst_node);
  722. break;
  723. #endif
  724. default:
  725. STARPU_ABORT();
  726. }
  727. if (count)
  728. starpu_memory_deallocate(dst_node, size);
  729. }
  730. int
  731. starpu_memory_pin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED)
  732. {
  733. if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0)
  734. {
  735. #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  736. if (cudaHostRegister(addr, size, cudaHostRegisterPortable) != cudaSuccess)
  737. return -1;
  738. #endif
  739. }
  740. return 0;
  741. }
  742. int
  743. starpu_memory_unpin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED)
  744. {
  745. if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0)
  746. {
  747. #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
  748. if (cudaHostUnregister(addr) != cudaSuccess)
  749. return -1;
  750. #endif
  751. }
  752. return 0;
  753. }
  754. /*
  755. * On CUDA which has very expensive malloc, for small sizes, allocate big
  756. * chunks divided in blocks, and we actually allocate segments of consecutive
  757. * blocks.
  758. *
  759. * We try to keep the list of chunks with increasing occupancy, so we can
  760. * quickly find free segments to allocate.
  761. */
  762. /* Size of each chunk, 32MiB granularity brings 128 chunks to be allocated in
  763. * order to fill a 4GiB GPU. */
  764. #define CHUNK_SIZE (32*1024*1024)
  765. /* Maximum segment size we will allocate in chunks */
  766. #define CHUNK_ALLOC_MAX (CHUNK_SIZE / 8)
  767. /* Granularity of allocation, i.e. block size, StarPU will never allocate less
  768. * than this.
  769. * 16KiB (i.e. 64x64 float) granularity eats 2MiB RAM for managing a 4GiB GPU.
  770. */
  771. #define CHUNK_ALLOC_MIN (16*1024)
  772. /* Don't really deallocate chunks unless we have more than this many chunks
  773. * which are completely free. */
  774. #define CHUNKS_NFREE 4
  775. /* Number of blocks */
  776. #define CHUNK_NBLOCKS (CHUNK_SIZE/CHUNK_ALLOC_MIN)
  777. /* Linked list for available segments */
  778. struct block
  779. {
  780. int length; /* Number of consecutive free blocks */
  781. int next; /* next free segment */
  782. };
  783. /* One chunk */
  784. LIST_TYPE(_starpu_chunk,
  785. uintptr_t base;
  786. /* Available number of blocks, for debugging */
  787. int available;
  788. /* Overestimation of the maximum size of available segments in this chunk */
  789. int available_max;
  790. /* Bitmap describing availability of the block */
  791. /* Block 0 is always empty, and is just the head of the free segments list */
  792. struct block bitmap[CHUNK_NBLOCKS+1];
  793. )
  794. /* One list of chunks per node */
  795. static struct _starpu_chunk_list chunks[STARPU_MAXNODES];
  796. /* Number of completely free chunks */
  797. static int nfreechunks[STARPU_MAXNODES];
  798. /* This protects chunks and nfreechunks */
  799. static starpu_pthread_mutex_t chunk_mutex[STARPU_MAXNODES];
  800. void
  801. _starpu_malloc_init(unsigned dst_node)
  802. {
  803. _starpu_chunk_list_init(&chunks[dst_node]);
  804. nfreechunks[dst_node] = 0;
  805. STARPU_PTHREAD_MUTEX_INIT(&chunk_mutex[dst_node], NULL);
  806. disable_pinning = starpu_get_env_number("STARPU_DISABLE_PINNING");
  807. malloc_on_node_default_flags[dst_node] = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT;
  808. #ifdef STARPU_SIMGRID
  809. #if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 15)
  810. /* Reasonably "costless" */
  811. _starpu_malloc_simulation_fold = starpu_get_env_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20;
  812. #endif
  813. #endif
  814. }
  815. void
  816. _starpu_malloc_shutdown(unsigned dst_node)
  817. {
  818. struct _starpu_chunk *chunk, *next_chunk;
  819. STARPU_PTHREAD_MUTEX_LOCK(&chunk_mutex[dst_node]);
  820. for (chunk = _starpu_chunk_list_begin(&chunks[dst_node]);
  821. chunk != _starpu_chunk_list_end(&chunks[dst_node]);
  822. chunk = next_chunk)
  823. {
  824. next_chunk = _starpu_chunk_list_next(chunk);
  825. _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, malloc_on_node_default_flags[dst_node]);
  826. _starpu_chunk_list_erase(&chunks[dst_node], chunk);
  827. free(chunk);
  828. }
  829. STARPU_PTHREAD_MUTEX_UNLOCK(&chunk_mutex[dst_node]);
  830. STARPU_PTHREAD_MUTEX_DESTROY(&chunk_mutex[dst_node]);
  831. }
  832. /* Create a new chunk */
  833. static struct _starpu_chunk *_starpu_new_chunk(unsigned dst_node, int flags)
  834. {
  835. struct _starpu_chunk *chunk;
  836. uintptr_t base = _starpu_malloc_on_node(dst_node, CHUNK_SIZE, flags);
  837. if (!base)
  838. return NULL;
  839. /* Create a new chunk */
  840. chunk = _starpu_chunk_new();
  841. chunk->base = base;
  842. /* First block is just a fake block pointing to the free segments list */
  843. chunk->bitmap[0].length = 0;
  844. chunk->bitmap[0].next = 1;
  845. /* At first we have only one big segment for the whole chunk */
  846. chunk->bitmap[1].length = CHUNK_NBLOCKS;
  847. chunk->bitmap[1].next = -1;
  848. chunk->available_max = CHUNK_NBLOCKS;
  849. chunk->available = CHUNK_NBLOCKS;
  850. return chunk;
  851. }
  852. uintptr_t
  853. starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags)
  854. {
  855. /* Big allocation, allocate normally */
  856. if (size > CHUNK_ALLOC_MAX || starpu_node_get_kind(dst_node) != STARPU_CUDA_RAM)
  857. return _starpu_malloc_on_node(dst_node, size, flags);
  858. /* Round up allocation to block size */
  859. int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN;
  860. struct _starpu_chunk *chunk;
  861. int prevblock, block;
  862. int available_max;
  863. struct block *bitmap;
  864. STARPU_PTHREAD_MUTEX_LOCK(&chunk_mutex[dst_node]);
  865. /* Try to find a big enough segment among the chunks */
  866. for (chunk = _starpu_chunk_list_begin(&chunks[dst_node]);
  867. chunk != _starpu_chunk_list_end(&chunks[dst_node]);
  868. chunk = _starpu_chunk_list_next(chunk))
  869. {
  870. if (chunk->available_max < nblocks)
  871. continue;
  872. bitmap = chunk->bitmap;
  873. available_max = 0;
  874. for (prevblock = block = 0;
  875. block != -1;
  876. prevblock = block, block = bitmap[prevblock].next)
  877. {
  878. STARPU_ASSERT(block >= 0 && block <= CHUNK_NBLOCKS);
  879. int length = bitmap[block].length;
  880. if (length >= nblocks)
  881. {
  882. if (length >= 2*nblocks)
  883. {
  884. /* This one this has quite some room,
  885. * put it front, to make finding it
  886. * easier next time. */
  887. _starpu_chunk_list_erase(&chunks[dst_node], chunk);
  888. _starpu_chunk_list_push_front(&chunks[dst_node], chunk);
  889. }
  890. if (chunk->available == CHUNK_NBLOCKS)
  891. /* This one was empty, it's not empty any more */
  892. nfreechunks[dst_node]--;
  893. goto found;
  894. }
  895. if (length > available_max)
  896. available_max = length;
  897. }
  898. /* Didn't find a big enough segment in this chunk, its
  899. * available_max is out of date */
  900. chunk->available_max = available_max;
  901. }
  902. /* Didn't find a big enough segment, create another chunk. */
  903. chunk = _starpu_new_chunk(dst_node, flags);
  904. if (!chunk)
  905. {
  906. /* Really no memory any more, fail */
  907. STARPU_PTHREAD_MUTEX_UNLOCK(&chunk_mutex[dst_node]);
  908. errno = ENOMEM;
  909. return 0;
  910. }
  911. /* And make it easy to find. */
  912. _starpu_chunk_list_push_front(&chunks[dst_node], chunk);
  913. bitmap = chunk->bitmap;
  914. prevblock = 0;
  915. block = 1;
  916. found:
  917. chunk->available -= nblocks;
  918. STARPU_ASSERT(bitmap[block].length >= nblocks);
  919. STARPU_ASSERT(block <= CHUNK_NBLOCKS);
  920. if (bitmap[block].length == nblocks)
  921. {
  922. /* Fits exactly, drop this segment from the skip list */
  923. bitmap[prevblock].next = bitmap[block].next;
  924. }
  925. else
  926. {
  927. /* Still some room */
  928. STARPU_ASSERT(block + nblocks <= CHUNK_NBLOCKS);
  929. bitmap[prevblock].next = block + nblocks;
  930. bitmap[block + nblocks].length = bitmap[block].length - nblocks;
  931. bitmap[block + nblocks].next = bitmap[block].next;
  932. }
  933. STARPU_PTHREAD_MUTEX_UNLOCK(&chunk_mutex[dst_node]);
  934. return chunk->base + (block-1) * CHUNK_ALLOC_MIN;
  935. }
  936. void
  937. starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags)
  938. {
  939. /* Big allocation, deallocate normally */
  940. if (size > CHUNK_ALLOC_MAX || starpu_node_get_kind(dst_node) != STARPU_CUDA_RAM)
  941. {
  942. _starpu_free_on_node_flags(dst_node, addr, size, flags);
  943. return;
  944. }
  945. struct _starpu_chunk *chunk;
  946. /* Round up allocation to block size */
  947. int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN;
  948. STARPU_PTHREAD_MUTEX_LOCK(&chunk_mutex[dst_node]);
  949. for (chunk = _starpu_chunk_list_begin(&chunks[dst_node]);
  950. chunk != _starpu_chunk_list_end(&chunks[dst_node]);
  951. chunk = _starpu_chunk_list_next(chunk))
  952. if (addr >= chunk->base && addr < chunk->base + CHUNK_SIZE)
  953. break;
  954. STARPU_ASSERT(chunk != _starpu_chunk_list_end(&chunks[dst_node]));
  955. struct block *bitmap = chunk->bitmap;
  956. int block = ((addr - chunk->base) / CHUNK_ALLOC_MIN) + 1, prevblock, nextblock;
  957. /* Look for free segment just before this one */
  958. for (prevblock = 0;
  959. prevblock != -1;
  960. prevblock = nextblock)
  961. {
  962. STARPU_ASSERT(prevblock >= 0 && prevblock <= CHUNK_NBLOCKS);
  963. nextblock = bitmap[prevblock].next;
  964. STARPU_ASSERT_MSG(nextblock != block, "It seems data 0x%lx (size %u) on node %u is being freed a second time\n", (unsigned long) addr, (unsigned) size, dst_node);
  965. if (nextblock > block || nextblock == -1)
  966. break;
  967. }
  968. STARPU_ASSERT(prevblock != -1);
  969. chunk->available += nblocks;
  970. /* Insert in free segments list */
  971. bitmap[block].next = nextblock;
  972. bitmap[prevblock].next = block;
  973. bitmap[block].length = nblocks;
  974. STARPU_ASSERT(nextblock >= -1 && nextblock <= CHUNK_NBLOCKS);
  975. if (nextblock == block + nblocks)
  976. {
  977. /* This freed segment is just before a free segment, merge them */
  978. bitmap[block].next = bitmap[nextblock].next;
  979. bitmap[block].length += bitmap[nextblock].length;
  980. if (bitmap[block].length > chunk->available_max)
  981. chunk->available_max = bitmap[block].length;
  982. }
  983. if (prevblock > 0 && prevblock + bitmap[prevblock].length == block)
  984. {
  985. /* This free segment is just after a free segment, merge them */
  986. bitmap[prevblock].next = bitmap[block].next;
  987. bitmap[prevblock].length += bitmap[block].length;
  988. if (bitmap[prevblock].length > chunk->available_max)
  989. chunk->available_max = bitmap[prevblock].length;
  990. block = prevblock;
  991. }
  992. if (chunk->available == CHUNK_NBLOCKS)
  993. {
  994. /* This chunk is now empty, but avoid chunk free/alloc
  995. * ping-pong by keeping some of these. */
  996. if (nfreechunks[dst_node] >= CHUNKS_NFREE)
  997. {
  998. /* We already have free chunks, release this one */
  999. _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, flags);
  1000. _starpu_chunk_list_erase(&chunks[dst_node], chunk);
  1001. free(chunk);
  1002. }
  1003. else
  1004. nfreechunks[dst_node]++;
  1005. }
  1006. else
  1007. {
  1008. /* Freed some room, put this first in chunks list */
  1009. _starpu_chunk_list_erase(&chunks[dst_node], chunk);
  1010. _starpu_chunk_list_push_front(&chunks[dst_node], chunk);
  1011. }
  1012. STARPU_PTHREAD_MUTEX_UNLOCK(&chunk_mutex[dst_node]);
  1013. }
  1014. void starpu_malloc_on_node_set_default_flags(unsigned node, int flags)
  1015. {
  1016. STARPU_ASSERT_MSG(node < STARPU_MAXNODES, "bogus node value %u given to starpu_malloc_on_node_set_default_flags\n", node);
  1017. malloc_on_node_default_flags[node] = flags;
  1018. }
  1019. uintptr_t
  1020. starpu_malloc_on_node(unsigned dst_node, size_t size)
  1021. {
  1022. return starpu_malloc_on_node_flags(dst_node, size, malloc_on_node_default_flags[dst_node]);
  1023. }
  1024. void
  1025. starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
  1026. {
  1027. starpu_free_on_node_flags(dst_node, addr, size, malloc_on_node_default_flags[dst_node]);
  1028. }