memalloc.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. #include <common/uthash.h>
  23. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  24. /* Note: handle header lock is always taken before this */
  25. static struct _starpu_spinlock mc_lock[STARPU_MAXNODES];
  26. /* Potentially in use memory chunks */
  27. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  28. /* Explicitly caches memory chunks that can be reused */
  29. struct mc_cache_entry
  30. {
  31. UT_hash_handle hh;
  32. struct _starpu_mem_chunk_list *list;
  33. uint32_t footprint;
  34. };
  35. static struct mc_cache_entry *mc_cache[STARPU_MAXNODES];
  36. static int mc_cache_nb[STARPU_MAXNODES];
  37. static starpu_ssize_t mc_cache_size[STARPU_MAXNODES];
  38. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  39. const unsigned starpu_memstrategy_data_size_coefficient=2;
  40. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  41. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  42. void _starpu_init_mem_chunk_lists(void)
  43. {
  44. unsigned i;
  45. for (i = 0; i < STARPU_MAXNODES; i++)
  46. {
  47. _starpu_spin_init(&mc_lock[i]);
  48. mc_list[i] = _starpu_mem_chunk_list_new();
  49. STARPU_HG_DISABLE_CHECKING(mc_cache_size[i]);
  50. }
  51. }
  52. void _starpu_deinit_mem_chunk_lists(void)
  53. {
  54. unsigned i;
  55. for (i = 0; i < STARPU_MAXNODES; i++)
  56. {
  57. struct mc_cache_entry *entry, *tmp;
  58. _starpu_mem_chunk_list_delete(mc_list[i]);
  59. HASH_ITER(hh, mc_cache[i], entry, tmp)
  60. {
  61. HASH_DEL(mc_cache[i], entry);
  62. _starpu_mem_chunk_list_delete(entry->list);
  63. free(entry);
  64. }
  65. STARPU_ASSERT(mc_cache_nb[i] == 0);
  66. STARPU_ASSERT(mc_cache_size[i] == 0);
  67. _starpu_spin_destroy(&mc_lock[i]);
  68. }
  69. }
  70. /*
  71. * Manipulate subtrees
  72. */
  73. static void unlock_all_subtree(starpu_data_handle_t handle)
  74. {
  75. /* lock all sub-subtrees children
  76. * Note that this is done in the reverse order of the
  77. * lock_all_subtree so that we avoid deadlock */
  78. unsigned i;
  79. for (i =0; i < handle->nchildren; i++)
  80. {
  81. unsigned child = handle->nchildren - 1 - i;
  82. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  83. unlock_all_subtree(child_handle);
  84. }
  85. _starpu_spin_unlock(&handle->header_lock);
  86. }
  87. static int lock_all_subtree(starpu_data_handle_t handle)
  88. {
  89. int child;
  90. /* lock parent */
  91. if (_starpu_spin_trylock(&handle->header_lock))
  92. /* the handle is busy, abort */
  93. return 0;
  94. /* lock all sub-subtrees children */
  95. for (child = 0; child < (int) handle->nchildren; child++)
  96. {
  97. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  98. /* Some child is busy, abort */
  99. while (--child >= 0)
  100. /* Unlock what we have already uselessly locked */
  101. unlock_all_subtree(starpu_data_get_child(handle, child));
  102. return 0;
  103. }
  104. }
  105. return 1;
  106. }
  107. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  108. {
  109. /* we only free if no one refers to the leaf */
  110. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  111. if (refcnt)
  112. return 0;
  113. if (!handle->nchildren)
  114. return 1;
  115. /* look into all sub-subtrees children */
  116. unsigned child;
  117. for (child = 0; child < handle->nchildren; child++)
  118. {
  119. unsigned res;
  120. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  121. res = may_free_subtree(child_handle, node);
  122. if (!res) return 0;
  123. }
  124. /* no problem was found */
  125. return 1;
  126. }
  127. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  128. unsigned dst_node)
  129. {
  130. unsigned i;
  131. unsigned last = 0;
  132. unsigned cnt;
  133. int ret;
  134. STARPU_ASSERT(dst_node != src_node);
  135. if (handle->nchildren == 0)
  136. {
  137. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  138. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  139. /* this is a leaf */
  140. switch(src_replicate->state)
  141. {
  142. case STARPU_OWNER:
  143. /* the local node has the only copy */
  144. /* the owner is now the destination_node */
  145. src_replicate->state = STARPU_INVALID;
  146. dst_replicate->state = STARPU_OWNER;
  147. #ifdef STARPU_DEVEL
  148. #warning we should use requests during memory reclaim
  149. #endif
  150. /* TODO use request !! */
  151. /* Take temporary references on the replicates */
  152. _starpu_spin_checklocked(&handle->header_lock);
  153. src_replicate->refcnt++;
  154. dst_replicate->refcnt++;
  155. handle->busy_count+=2;
  156. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  157. STARPU_ASSERT(ret == 0);
  158. src_replicate->refcnt--;
  159. dst_replicate->refcnt--;
  160. STARPU_ASSERT(handle->busy_count >= 2);
  161. handle->busy_count -= 2;
  162. ret = _starpu_data_check_not_busy(handle);
  163. STARPU_ASSERT(ret == 0);
  164. break;
  165. case STARPU_SHARED:
  166. /* some other node may have the copy */
  167. src_replicate->state = STARPU_INVALID;
  168. /* count the number of copies */
  169. cnt = 0;
  170. for (i = 0; i < STARPU_MAXNODES; i++)
  171. {
  172. if (handle->per_node[i].state == STARPU_SHARED)
  173. {
  174. cnt++;
  175. last = i;
  176. }
  177. }
  178. STARPU_ASSERT(cnt > 0);
  179. if (cnt == 1)
  180. handle->per_node[last].state = STARPU_OWNER;
  181. break;
  182. case STARPU_INVALID:
  183. /* nothing to be done */
  184. break;
  185. default:
  186. STARPU_ABORT();
  187. break;
  188. }
  189. }
  190. else
  191. {
  192. /* lock all sub-subtrees children */
  193. unsigned child;
  194. for (child = 0; child < handle->nchildren; child++)
  195. {
  196. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  197. transfer_subtree_to_node(child_handle, src_node, dst_node);
  198. }
  199. }
  200. }
  201. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  202. {
  203. unsigned child;
  204. replicate->allocated = 0;
  205. /* XXX why do we need that ? */
  206. replicate->automatically_allocated = 0;
  207. for (child = 0; child < handle->nchildren; child++)
  208. {
  209. /* Notify children that their buffer has been deallocated too */
  210. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  211. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  212. }
  213. }
  214. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  215. {
  216. size_t freed = 0;
  217. STARPU_ASSERT(mc->ops);
  218. STARPU_ASSERT(mc->ops->free_data_on_node);
  219. starpu_data_handle_t handle = mc->data;
  220. struct _starpu_data_replicate *replicate = mc->replicate;
  221. if (handle)
  222. _starpu_spin_checklocked(&handle->header_lock);
  223. if (mc->automatically_allocated &&
  224. (!handle || replicate->refcnt == 0))
  225. {
  226. void *data_interface;
  227. if (handle)
  228. STARPU_ASSERT(replicate->allocated);
  229. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  230. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  231. {
  232. /* To facilitate the design of interface, we set the
  233. * proper CUDA device in case it is needed. This avoids
  234. * having to set it again in the free method of each
  235. * interface. */
  236. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  237. }
  238. #endif
  239. if (handle)
  240. data_interface = replicate->data_interface;
  241. else
  242. data_interface = mc->chunk_interface;
  243. STARPU_ASSERT(data_interface);
  244. _STARPU_TRACE_START_FREE(node, mc->size);
  245. mc->ops->free_data_on_node(data_interface, node);
  246. _STARPU_TRACE_END_FREE(node);
  247. if (handle)
  248. notify_handle_children(handle, replicate, node);
  249. freed = mc->size;
  250. if (handle)
  251. STARPU_ASSERT(replicate->refcnt == 0);
  252. }
  253. return freed;
  254. }
  255. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  256. {
  257. size_t size;
  258. starpu_data_handle_t handle = mc->data;
  259. if (handle) {
  260. _starpu_spin_checklocked(&handle->header_lock);
  261. mc->size = _starpu_data_get_size(handle);
  262. }
  263. if (mc->replicate)
  264. mc->replicate->mc=NULL;
  265. /* free the actual buffer */
  266. size = free_memory_on_node(mc, node);
  267. /* remove the mem_chunk from the list */
  268. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  269. _starpu_mem_chunk_delete(mc);
  270. return size;
  271. }
  272. /* This function is called for memory chunks that are possibly in used (ie. not
  273. * in the cache). They should therefore still be associated to a handle. */
  274. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  275. {
  276. size_t freed = 0;
  277. starpu_data_handle_t handle;
  278. handle = mc->data;
  279. STARPU_ASSERT(handle);
  280. /* This data should be written through to this node, avoid dropping it! */
  281. if (handle->wt_mask & (1<<node))
  282. return 0;
  283. /* This data was registered from this node, we will not be able to drop it anyway */
  284. if ((int) node == handle->home_node)
  285. return 0;
  286. /* REDUX memchunk */
  287. if (mc->relaxed_coherency == 2)
  288. {
  289. /* TODO: reduce it back to e.g. main memory */
  290. }
  291. else
  292. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  293. * memchunk that could be used with filters. */
  294. if (mc->relaxed_coherency == 1)
  295. {
  296. STARPU_ASSERT(mc->replicate);
  297. if (_starpu_spin_trylock(&handle->header_lock))
  298. /* Handle is busy, abort */
  299. return 0;
  300. if (mc->replicate->refcnt == 0)
  301. {
  302. /* Note that there is no need to transfer any data or
  303. * to update the status in terms of MSI protocol
  304. * because this memchunk is associated to a replicate
  305. * in "relaxed coherency" mode. */
  306. freed = do_free_mem_chunk(mc, node);
  307. }
  308. _starpu_spin_unlock(&handle->header_lock);
  309. }
  310. /* try to lock all the subtree */
  311. else if (lock_all_subtree(handle))
  312. {
  313. /* check if they are all "free" */
  314. if (may_free_subtree(handle, node))
  315. {
  316. int target = -1;
  317. /* XXX Considering only owner to invalidate */
  318. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  319. /* in case there was nobody using that buffer, throw it
  320. * away after writing it back to main memory */
  321. /* choose the best target */
  322. target = choose_target(handle, node);
  323. if (target != -1) {
  324. #ifdef STARPU_MEMORY_STATS
  325. if (handle->per_node[node].state == STARPU_OWNER)
  326. _starpu_memory_handle_stats_invalidated(handle, node);
  327. #endif
  328. _STARPU_TRACE_START_WRITEBACK(node);
  329. transfer_subtree_to_node(handle, node, target);
  330. _STARPU_TRACE_END_WRITEBACK(node);
  331. #ifdef STARPU_MEMORY_STATS
  332. _starpu_memory_handle_stats_loaded_owner(handle, target);
  333. #endif
  334. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  335. /* now the actual buffer may be freed */
  336. freed = do_free_mem_chunk(mc, node);
  337. }
  338. }
  339. /* unlock the tree */
  340. unlock_all_subtree(handle);
  341. }
  342. return freed;
  343. }
  344. #ifdef STARPU_USE_ALLOCATION_CACHE
  345. /* We assume that mc_lock[node] is taken. is_already_in_mc_list indicates
  346. * that the mc is already in the list of buffers that are possibly used, and
  347. * therefore not in the cache. */
  348. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  349. {
  350. void *data_interface;
  351. /* we found an appropriate mem chunk: so we get it out
  352. * of the "to free" list, and reassign it to the new
  353. * piece of data */
  354. struct _starpu_data_replicate *old_replicate = mc->replicate;
  355. if (old_replicate)
  356. {
  357. old_replicate->allocated = 0;
  358. old_replicate->automatically_allocated = 0;
  359. old_replicate->initialized = 0;
  360. data_interface = old_replicate->data_interface;
  361. }
  362. else
  363. data_interface = mc->chunk_interface;
  364. STARPU_ASSERT(new_replicate->data_interface);
  365. STARPU_ASSERT(data_interface);
  366. memcpy(new_replicate->data_interface, data_interface, mc->size_interface);
  367. if (!old_replicate)
  368. {
  369. /* Free the copy that we made */
  370. free(mc->chunk_interface);
  371. mc->chunk_interface = NULL;
  372. }
  373. /* XXX: We do not actually reuse the mc at the moment, only the interface */
  374. /* mc->data = new_replicate->handle; */
  375. /* mc->footprint, mc->ops, mc->size_interface, mc->automatically_allocated should be
  376. * unchanged ! */
  377. /* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */
  378. if (is_already_in_mc_list)
  379. {
  380. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  381. }
  382. free(mc);
  383. }
  384. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  385. {
  386. unsigned success = 0;
  387. starpu_data_handle_t old_data;
  388. old_data = mc->data;
  389. STARPU_ASSERT(old_data);
  390. /* try to lock all the subtree */
  391. /* and check if they are all "free" */
  392. if (lock_all_subtree(old_data))
  393. {
  394. if (may_free_subtree(old_data, node))
  395. {
  396. success = 1;
  397. /* in case there was nobody using that buffer, throw it
  398. * away after writing it back to main memory */
  399. _STARPU_TRACE_START_WRITEBACK(node);
  400. transfer_subtree_to_node(old_data, node, 0);
  401. _STARPU_TRACE_END_WRITEBACK(node);
  402. /* now replace the previous data */
  403. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  404. }
  405. /* unlock the tree */
  406. unlock_all_subtree(old_data);
  407. }
  408. return success;
  409. }
  410. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  411. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  412. {
  413. if (ops_a->interfaceid != ops_b->interfaceid)
  414. return -1;
  415. int ret = ops_a->compare(data_interface_a, data_interface_b);
  416. return ret;
  417. }
  418. /* This function must be called with mc_lock[node] taken */
  419. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
  420. {
  421. /* go through all buffers in the cache */
  422. struct mc_cache_entry *entry;
  423. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  424. if (!entry)
  425. /* No data with that footprint */
  426. return NULL;
  427. struct _starpu_mem_chunk *mc;
  428. for (mc = _starpu_mem_chunk_list_begin(entry->list);
  429. mc != _starpu_mem_chunk_list_end(entry->list);
  430. mc = _starpu_mem_chunk_list_next(mc))
  431. {
  432. /* Is that a false hit ? (this is _very_ unlikely) */
  433. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops) != 1)
  434. continue;
  435. /* Cache hit */
  436. /* Remove from the cache */
  437. _starpu_mem_chunk_list_erase(entry->list, mc);
  438. mc_cache_nb[node]--;
  439. STARPU_ASSERT(mc_cache_nb[node] >= 0);
  440. mc_cache_size[node] -= mc->size;
  441. STARPU_ASSERT(mc_cache_size[node] >= 0);
  442. return mc;
  443. }
  444. /* This is a cache miss */
  445. return NULL;
  446. }
  447. /* this function looks for a memory chunk that matches a given footprint in the
  448. * list of mem chunk that need to be freed. This function must be called with
  449. * mc_lock[node] taken. */
  450. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  451. {
  452. struct _starpu_mem_chunk *mc, *next_mc;
  453. /* go through all buffers in the cache */
  454. mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint);
  455. if (mc)
  456. {
  457. /* We found an entry in the cache so we can reuse it */
  458. reuse_mem_chunk(node, replicate, mc, 0);
  459. return 1;
  460. }
  461. if (!_starpu_has_not_important_data)
  462. return 0;
  463. /* now look for some non essential data in the active list */
  464. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  465. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  466. mc = next_mc)
  467. {
  468. /* there is a risk that the memory chunk is freed before next
  469. * iteration starts: so we compute the next element of the list
  470. * now */
  471. next_mc = _starpu_mem_chunk_list_next(mc);
  472. if (mc->data->is_not_important && (mc->footprint == footprint))
  473. {
  474. // fprintf(stderr, "found a candidate ...\n");
  475. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  476. return 1;
  477. }
  478. }
  479. return 0;
  480. }
  481. #endif
  482. /*
  483. * Free the memory chuncks that are explicitely tagged to be freed. The
  484. * mc_lock[node] rw-lock should be taken prior to calling this function.
  485. */
  486. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  487. {
  488. struct _starpu_mem_chunk *mc;
  489. struct _starpu_mem_chunk_list *busy_mc_cache;
  490. struct mc_cache_entry *entry, *tmp;
  491. size_t freed = 0;
  492. _starpu_spin_lock(&mc_lock[node]);
  493. HASH_ITER(hh, mc_cache[node], entry, tmp)
  494. {
  495. busy_mc_cache = _starpu_mem_chunk_list_new();
  496. while (!_starpu_mem_chunk_list_empty(entry->list)) {
  497. mc = _starpu_mem_chunk_list_pop_front(entry->list);
  498. starpu_data_handle_t handle = mc->data;
  499. if (handle)
  500. if (_starpu_spin_trylock(&handle->header_lock)) {
  501. /* The handle is still busy, leave this chunk for later */
  502. _starpu_mem_chunk_list_push_back(busy_mc_cache, mc);
  503. continue;
  504. }
  505. mc_cache_nb[node]--;
  506. STARPU_ASSERT(mc_cache_nb[node] >= 0);
  507. mc_cache_size[node] -= mc->size;
  508. STARPU_ASSERT(mc_cache_size[node] >= 0);
  509. freed += free_memory_on_node(mc, node);
  510. if (handle)
  511. _starpu_spin_unlock(&handle->header_lock);
  512. free(mc->chunk_interface);
  513. _starpu_mem_chunk_delete(mc);
  514. if (reclaim && freed >= reclaim)
  515. break;
  516. }
  517. _starpu_mem_chunk_list_push_list_front(busy_mc_cache, entry->list);
  518. _starpu_mem_chunk_list_delete(busy_mc_cache);
  519. if (reclaim && freed >= reclaim)
  520. break;
  521. }
  522. _starpu_spin_unlock(&mc_lock[node]);
  523. return freed;
  524. }
  525. /*
  526. * Try to free the buffers currently in use on the memory node. If the force
  527. * flag is set, the memory is freed regardless of coherency concerns (this
  528. * should only be used at the termination of StarPU for instance). The
  529. * mc_lock[node] should be taken prior to calling this function.
  530. */
  531. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  532. {
  533. size_t freed = 0;
  534. struct _starpu_mem_chunk *mc, *next_mc;
  535. /*
  536. * We have to unlock mc_lock before locking header_lock, so we have
  537. * to be careful with the list. We try to do just one pass, by
  538. * remembering the next mc to be tried. If it gets dropped, we restart
  539. * from zero. So we continue until we go through the whole list without
  540. * finding anything to free.
  541. */
  542. restart:
  543. _starpu_spin_lock(&mc_lock[node]);
  544. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  545. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  546. mc = next_mc)
  547. {
  548. /* mc hopefully gets out of the list, we thus need to prefetch
  549. * the next element */
  550. next_mc = _starpu_mem_chunk_list_next(mc);
  551. if (!force)
  552. {
  553. freed += try_to_free_mem_chunk(mc, node);
  554. if (reclaim && freed >= reclaim)
  555. break;
  556. }
  557. else
  558. {
  559. starpu_data_handle_t handle = mc->data;
  560. if (_starpu_spin_trylock(&handle->header_lock))
  561. {
  562. /* Ergl. We are shutting down, but somebody is
  563. * still locking the handle. That's not
  564. * supposed to happen, but better be safe by
  565. * letting it go through. */
  566. _starpu_spin_unlock(&mc_lock[node]);
  567. goto restart;
  568. }
  569. /* We must free the memory now, because we are
  570. * terminating the drivers: note that data coherency is
  571. * not maintained in that case ! */
  572. freed += do_free_mem_chunk(mc, node);
  573. _starpu_spin_unlock(&handle->header_lock);
  574. }
  575. }
  576. _starpu_spin_unlock(&mc_lock[node]);
  577. return freed;
  578. }
  579. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  580. {
  581. size_t freed = 0;
  582. if (reclaim && !force)
  583. {
  584. static int warned;
  585. if (!warned) {
  586. char name[32];
  587. _starpu_memory_node_get_name(node, name, sizeof(name));
  588. _STARPU_DISP("Not enough memory left on node %s. Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges\n", name, (unsigned long) (reclaim / 1048576));
  589. warned = 1;
  590. }
  591. }
  592. /* remove all buffers for which there was a removal request */
  593. freed += flush_memchunk_cache(node, reclaim);
  594. /* try to free all allocated data potentially in use */
  595. if (reclaim && freed<reclaim)
  596. freed += free_potentially_in_use_mc(node, force, reclaim);
  597. return freed;
  598. }
  599. /*
  600. * This function frees all the memory that was implicitely allocated by StarPU
  601. * (for the data replicates). This is not ensuring data coherency, and should
  602. * only be called while StarPU is getting shut down.
  603. */
  604. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  605. {
  606. return _starpu_memory_reclaim_generic(node, 1, 0);
  607. }
  608. /* Periodic tidy of available memory */
  609. void starpu_memchunk_tidy(unsigned node)
  610. {
  611. starpu_ssize_t total = starpu_memory_get_total(node);
  612. starpu_ssize_t available = starpu_memory_get_available(node);
  613. size_t target, amount;
  614. unsigned minimum_p = starpu_get_env_number_default("STARPU_MINIMUM_AVAILABLE_MEM", 5);
  615. unsigned target_p = starpu_get_env_number_default("STARPU_TARGET_AVAILABLE_MEM", 10);
  616. if (total <= 0)
  617. return;
  618. /* TODO: only request writebacks to get buffers clean, without waiting
  619. * for it */
  620. /* Count cached allocation as being available */
  621. available += mc_cache_size[node];
  622. if (available >= (total * minimum_p) / 100)
  623. /* Enough available space, do not trigger reclaiming */
  624. return;
  625. /* Not enough available space, reclaim until we reach the target. */
  626. target = (total * target_p) / 100;
  627. amount = target - available;
  628. static int warned;
  629. if (!warned) {
  630. char name[32];
  631. _starpu_memory_node_get_name(node, name, sizeof(name));
  632. _STARPU_DISP("Low memory left on node %s (%luMiB over %luMiB). Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. The thresholds can be tuned using the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment variables.\n", name, (unsigned long) (available / 1048576), (unsigned long) (total / 1048576), (unsigned long) (amount / 1048576));
  633. warned = 1;
  634. }
  635. free_potentially_in_use_mc(node, 0, amount);
  636. }
  637. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  638. {
  639. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  640. starpu_data_handle_t handle = replicate->handle;
  641. STARPU_ASSERT(handle);
  642. STARPU_ASSERT(handle->ops);
  643. mc->data = handle;
  644. mc->footprint = _starpu_compute_data_footprint(handle);
  645. mc->ops = handle->ops;
  646. mc->automatically_allocated = automatically_allocated;
  647. mc->relaxed_coherency = replicate->relaxed_coherency;
  648. mc->replicate = replicate;
  649. mc->replicate->mc = mc;
  650. mc->chunk_interface = NULL;
  651. mc->size_interface = interface_size;
  652. return mc;
  653. }
  654. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  655. {
  656. unsigned dst_node = replicate->memory_node;
  657. struct _starpu_mem_chunk *mc;
  658. /* the interface was already filled by ops->allocate_data_on_node */
  659. size_t interface_size = replicate->handle->ops->interface_size;
  660. /* Put this memchunk in the list of memchunk in use */
  661. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  662. _starpu_spin_lock(&mc_lock[dst_node]);
  663. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  664. _starpu_spin_unlock(&mc_lock[dst_node]);
  665. }
  666. /* This function is called when the handle is destroyed (eg. when calling
  667. * unregister or unpartition). It puts all the memchunks that refer to the
  668. * specified handle into the cache.
  669. */
  670. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  671. {
  672. struct _starpu_mem_chunk *mc = replicate->mc;
  673. STARPU_ASSERT(mc->data == handle);
  674. /* Record the allocated size, so that later in memory
  675. * reclaiming we can estimate how much memory we free
  676. * by freeing this. */
  677. mc->size = size;
  678. /* Also keep the interface parameters and pointers, for later reuse
  679. * while detached, or freed */
  680. mc->chunk_interface = malloc(mc->size_interface);
  681. memcpy(mc->chunk_interface, replicate->data_interface, mc->size_interface);
  682. /* This memchunk doesn't have to do with the data any more. */
  683. replicate->mc = NULL;
  684. mc->replicate = NULL;
  685. replicate->allocated = 0;
  686. replicate->automatically_allocated = 0;
  687. replicate->initialized = 0;
  688. _starpu_spin_lock(&mc_lock[node]);
  689. mc->data = NULL;
  690. /* remove it from the main list */
  691. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  692. _starpu_spin_unlock(&mc_lock[node]);
  693. /*
  694. * Unless the user has provided a main RAM limitation, we would fill
  695. * memory with cached data and then eventually swap.
  696. */
  697. /*
  698. * This is particularly important when
  699. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  700. * wouldn't even re-use these allocations!
  701. */
  702. if (starpu_node_get_kind(node) == STARPU_CPU_RAM && starpu_get_env_number("STARPU_LIMIT_CPU_MEM") < 0)
  703. {
  704. /* Free data immediately */
  705. free_memory_on_node(mc, node);
  706. free(mc->chunk_interface);
  707. _starpu_mem_chunk_delete(mc);
  708. }
  709. else
  710. {
  711. /* put it in the list of buffers to be removed */
  712. uint32_t footprint = mc->footprint;
  713. struct mc_cache_entry *entry;
  714. _starpu_spin_lock(&mc_lock[node]);
  715. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  716. if (!entry) {
  717. entry = malloc(sizeof(*entry));
  718. entry->list = _starpu_mem_chunk_list_new();
  719. entry->footprint = footprint;
  720. HASH_ADD(hh, mc_cache[node], footprint, sizeof(entry->footprint), entry);
  721. }
  722. mc_cache_nb[node]++;
  723. mc_cache_size[node] += mc->size;
  724. _starpu_mem_chunk_list_push_front(entry->list, mc);
  725. _starpu_spin_unlock(&mc_lock[node]);
  726. }
  727. }
  728. /*
  729. * In order to allocate a piece of data, we try to reuse existing buffers if
  730. * its possible.
  731. * 1 - we try to reuse a memchunk that is explicitely unused.
  732. * 2 - we go through the list of memory chunks and find one that is not
  733. * referenced and that has the same footprint to reuse it.
  734. * 3 - we call the usual driver's alloc method
  735. * 4 - we go through the list of memory chunks and release those that are
  736. * not referenced (or part of those).
  737. *
  738. */
  739. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  740. {
  741. unsigned attempts = 0;
  742. starpu_ssize_t allocated_memory;
  743. int ret;
  744. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  745. _starpu_spin_checklocked(&handle->header_lock);
  746. _starpu_data_allocation_inc_stats(dst_node);
  747. #ifdef STARPU_USE_ALLOCATION_CACHE
  748. /* perhaps we can directly reuse a buffer in the free-list */
  749. uint32_t footprint = _starpu_compute_data_footprint(handle);
  750. _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size);
  751. _starpu_spin_lock(&mc_lock[dst_node]);
  752. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  753. {
  754. _starpu_spin_unlock(&mc_lock[dst_node]);
  755. _starpu_allocation_cache_hit(dst_node);
  756. return data_size;
  757. }
  758. _starpu_spin_unlock(&mc_lock[dst_node]);
  759. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  760. #endif
  761. STARPU_ASSERT(handle->ops);
  762. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  763. STARPU_ASSERT(replicate->data_interface);
  764. char data_interface[handle->ops->interface_size];
  765. memcpy(data_interface, replicate->data_interface, handle->ops->interface_size);
  766. /* Take temporary reference on the replicate */
  767. replicate->refcnt++;
  768. handle->busy_count++;
  769. _starpu_spin_unlock(&handle->header_lock);
  770. do
  771. {
  772. _STARPU_TRACE_START_ALLOC(dst_node, data_size);
  773. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  774. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  775. {
  776. /* To facilitate the design of interface, we set the
  777. * proper CUDA device in case it is needed. This avoids
  778. * having to set it again in the malloc method of each
  779. * interface. */
  780. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  781. }
  782. #endif
  783. allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node);
  784. _STARPU_TRACE_END_ALLOC(dst_node);
  785. if (allocated_memory == -ENOMEM)
  786. {
  787. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  788. size_t handle_size = handle->ops->get_size(handle);
  789. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  790. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  791. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  792. if (is_prefetch)
  793. {
  794. flush_memchunk_cache(dst_node, reclaim);
  795. }
  796. else
  797. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  798. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  799. }
  800. }
  801. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  802. int cpt = 0;
  803. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  804. {
  805. cpt++;
  806. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  807. }
  808. if (cpt == STARPU_SPIN_MAXTRY)
  809. _starpu_spin_lock(&handle->header_lock);
  810. replicate->refcnt--;
  811. STARPU_ASSERT(replicate->refcnt >= 0);
  812. STARPU_ASSERT(handle->busy_count > 0);
  813. handle->busy_count--;
  814. ret = _starpu_data_check_not_busy(handle);
  815. STARPU_ASSERT(ret == 0);
  816. if (replicate->allocated)
  817. {
  818. /* Argl, somebody allocated it in between already, drop this one */
  819. _STARPU_TRACE_START_FREE(dst_node, data_size);
  820. handle->ops->free_data_on_node(data_interface, dst_node);
  821. _STARPU_TRACE_END_FREE(dst_node);
  822. allocated_memory = 0;
  823. }
  824. else
  825. /* Install allocated interface */
  826. memcpy(replicate->data_interface, data_interface, handle->ops->interface_size);
  827. return allocated_memory;
  828. }
  829. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  830. {
  831. starpu_ssize_t allocated_memory;
  832. unsigned dst_node = replicate->memory_node;
  833. STARPU_ASSERT(handle);
  834. /* A buffer is already allocated on the node */
  835. if (replicate->allocated)
  836. return 0;
  837. STARPU_ASSERT(replicate->data_interface);
  838. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  839. /* perhaps we could really not handle that capacity misses */
  840. if (allocated_memory == -ENOMEM)
  841. return -ENOMEM;
  842. register_mem_chunk(replicate, 1);
  843. replicate->allocated = 1;
  844. replicate->automatically_allocated = 1;
  845. if (replicate->relaxed_coherency == 0 && dst_node == STARPU_MAIN_RAM)
  846. {
  847. /* We are allocating the buffer in main memory, also register it
  848. * for the gcc plugin. */
  849. void *ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
  850. if (ptr != NULL)
  851. {
  852. _starpu_data_register_ram_pointer(handle, ptr);
  853. }
  854. }
  855. return 0;
  856. }
  857. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  858. {
  859. return handle->per_node[memory_node].allocated;
  860. }
  861. /* This memchunk has been recently used, put it last on the mc_list, so we will
  862. * try to evict it as late as possible */
  863. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  864. {
  865. if (!mc)
  866. /* user-allocated memory */
  867. return;
  868. _starpu_spin_lock(&mc_lock[node]);
  869. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  870. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  871. _starpu_spin_unlock(&mc_lock[node]);
  872. }
  873. #ifdef STARPU_MEMORY_STATS
  874. void _starpu_memory_display_stats_by_node(int node)
  875. {
  876. _starpu_spin_lock(&mc_lock[node]);
  877. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  878. {
  879. struct _starpu_mem_chunk *mc;
  880. fprintf(stderr, "#-------\n");
  881. fprintf(stderr, "Data on Node #%d\n",node);
  882. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  883. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  884. mc = _starpu_mem_chunk_list_next(mc))
  885. {
  886. if (mc->automatically_allocated == 0)
  887. _starpu_memory_display_handle_stats(mc->data);
  888. }
  889. }
  890. _starpu_spin_unlock(&mc_lock[node]);
  891. }
  892. #endif
  893. void starpu_data_display_memory_stats(void)
  894. {
  895. #ifdef STARPU_MEMORY_STATS
  896. unsigned node;
  897. fprintf(stderr, "\n#---------------------\n");
  898. fprintf(stderr, "Memory stats :\n");
  899. for (node = 0; node < STARPU_MAXNODES; node++)
  900. {
  901. _starpu_memory_display_stats_by_node(node);
  902. }
  903. fprintf(stderr, "\n#---------------------\n");
  904. #endif
  905. }
  906. static int
  907. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  908. {
  909. int target = -1;
  910. unsigned nnodes = starpu_memory_nodes_get_count();
  911. unsigned int i;
  912. double time_disk = 0;
  913. for (i = 0; i < nnodes; i++)
  914. {
  915. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  916. (_starpu_memory_manager_test_allocate_size(i, _starpu_data_get_size(handle)) == 1 ||
  917. handle->per_node[i].allocated))
  918. {
  919. /* if we can write on the disk */
  920. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  921. {
  922. /* only time can change between disk <-> main_ram
  923. * and not between main_ram <-> worker if we compare diks*/
  924. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  925. if (target == -1 || time_disk > time_tmp)
  926. {
  927. target = i;
  928. time_disk = time_tmp;
  929. }
  930. }
  931. }
  932. }
  933. return target;
  934. }
  935. static unsigned
  936. choose_target(starpu_data_handle_t handle, unsigned node)
  937. {
  938. int target = -1;
  939. size_t size_handle = _starpu_data_get_size(handle);
  940. if (handle->home_node != -1)
  941. /* try to push on RAM if we can before to push on disk */
  942. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  943. {
  944. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  945. _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
  946. {
  947. target = STARPU_MAIN_RAM;
  948. }
  949. else
  950. {
  951. target = get_better_disk_can_accept_size(handle, node);
  952. }
  953. }
  954. /* others memory nodes */
  955. else
  956. {
  957. target = handle->home_node;
  958. }
  959. else
  960. {
  961. /* handle->home_node == -1 */
  962. /* no place for datas in RAM, we push on disk */
  963. if (node == STARPU_MAIN_RAM)
  964. {
  965. target = get_better_disk_can_accept_size(handle, node);
  966. }
  967. /* node != 0 */
  968. /* try to push data to RAM if we can before to push on disk*/
  969. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  970. _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
  971. {
  972. target = STARPU_MAIN_RAM;
  973. }
  974. /* no place in RAM */
  975. else
  976. {
  977. target = get_better_disk_can_accept_size(handle, node);
  978. }
  979. }
  980. /* we haven't the right to write on the disk */
  981. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  982. target = -1;
  983. return target;
  984. }