memalloc.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2014 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. #include <common/uthash.h>
  23. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  24. /* Note: handle header lock is always taken before this */
  25. static struct _starpu_spinlock mc_lock[STARPU_MAXNODES];
  26. /* Potentially in use memory chunks */
  27. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  28. /* Explicitly caches memory chunks that can be reused */
  29. struct mc_cache_entry
  30. {
  31. UT_hash_handle hh;
  32. struct _starpu_mem_chunk_list *list;
  33. uint32_t footprint;
  34. };
  35. static struct mc_cache_entry *mc_cache[STARPU_MAXNODES];
  36. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  37. const unsigned starpu_memstrategy_data_size_coefficient=2;
  38. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  39. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  40. void _starpu_init_mem_chunk_lists(void)
  41. {
  42. unsigned i;
  43. for (i = 0; i < STARPU_MAXNODES; i++)
  44. {
  45. _starpu_spin_init(&mc_lock[i]);
  46. mc_list[i] = _starpu_mem_chunk_list_new();
  47. }
  48. }
  49. void _starpu_deinit_mem_chunk_lists(void)
  50. {
  51. unsigned i;
  52. for (i = 0; i < STARPU_MAXNODES; i++)
  53. {
  54. struct mc_cache_entry *entry, *tmp;
  55. _starpu_mem_chunk_list_delete(mc_list[i]);
  56. HASH_ITER(hh, mc_cache[i], entry, tmp)
  57. {
  58. HASH_DEL(mc_cache[i], entry);
  59. _starpu_mem_chunk_list_delete(entry->list);
  60. free(entry);
  61. }
  62. _starpu_spin_destroy(&mc_lock[i]);
  63. }
  64. }
  65. /*
  66. * Manipulate subtrees
  67. */
  68. static void unlock_all_subtree(starpu_data_handle_t handle)
  69. {
  70. /* lock all sub-subtrees children
  71. * Note that this is done in the reverse order of the
  72. * lock_all_subtree so that we avoid deadlock */
  73. unsigned i;
  74. for (i =0; i < handle->nchildren; i++)
  75. {
  76. unsigned child = handle->nchildren - 1 - i;
  77. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  78. unlock_all_subtree(child_handle);
  79. }
  80. _starpu_spin_unlock(&handle->header_lock);
  81. }
  82. static int lock_all_subtree(starpu_data_handle_t handle)
  83. {
  84. int child;
  85. /* lock parent */
  86. if (_starpu_spin_trylock(&handle->header_lock))
  87. /* the handle is busy, abort */
  88. return 0;
  89. /* lock all sub-subtrees children */
  90. for (child = 0; child < (int) handle->nchildren; child++)
  91. {
  92. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  93. /* Some child is busy, abort */
  94. while (--child >= 0)
  95. /* Unlock what we have already uselessly locked */
  96. unlock_all_subtree(starpu_data_get_child(handle, child));
  97. return 0;
  98. }
  99. }
  100. return 1;
  101. }
  102. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  103. {
  104. /* we only free if no one refers to the leaf */
  105. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  106. if (refcnt)
  107. return 0;
  108. if (!handle->nchildren)
  109. return 1;
  110. /* look into all sub-subtrees children */
  111. unsigned child;
  112. for (child = 0; child < handle->nchildren; child++)
  113. {
  114. unsigned res;
  115. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  116. res = may_free_subtree(child_handle, node);
  117. if (!res) return 0;
  118. }
  119. /* no problem was found */
  120. return 1;
  121. }
  122. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  123. unsigned dst_node)
  124. {
  125. unsigned i;
  126. unsigned last = 0;
  127. unsigned cnt;
  128. int ret;
  129. STARPU_ASSERT(dst_node != src_node);
  130. if (handle->nchildren == 0)
  131. {
  132. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  133. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  134. /* this is a leaf */
  135. switch(src_replicate->state)
  136. {
  137. case STARPU_OWNER:
  138. /* the local node has the only copy */
  139. /* the owner is now the destination_node */
  140. src_replicate->state = STARPU_INVALID;
  141. dst_replicate->state = STARPU_OWNER;
  142. #ifdef STARPU_DEVEL
  143. #warning we should use requests during memory reclaim
  144. #endif
  145. /* TODO use request !! */
  146. /* Take temporary references on the replicates */
  147. _starpu_spin_checklocked(&handle->header_lock);
  148. src_replicate->refcnt++;
  149. dst_replicate->refcnt++;
  150. handle->busy_count+=2;
  151. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  152. STARPU_ASSERT(ret == 0);
  153. src_replicate->refcnt--;
  154. dst_replicate->refcnt--;
  155. STARPU_ASSERT(handle->busy_count >= 2);
  156. handle->busy_count -= 2;
  157. ret = _starpu_data_check_not_busy(handle);
  158. STARPU_ASSERT(ret == 0);
  159. break;
  160. case STARPU_SHARED:
  161. /* some other node may have the copy */
  162. src_replicate->state = STARPU_INVALID;
  163. /* count the number of copies */
  164. cnt = 0;
  165. for (i = 0; i < STARPU_MAXNODES; i++)
  166. {
  167. if (handle->per_node[i].state == STARPU_SHARED)
  168. {
  169. cnt++;
  170. last = i;
  171. }
  172. }
  173. STARPU_ASSERT(cnt > 0);
  174. if (cnt == 1)
  175. handle->per_node[last].state = STARPU_OWNER;
  176. break;
  177. case STARPU_INVALID:
  178. /* nothing to be done */
  179. break;
  180. default:
  181. STARPU_ABORT();
  182. break;
  183. }
  184. }
  185. else
  186. {
  187. /* lock all sub-subtrees children */
  188. unsigned child;
  189. for (child = 0; child < handle->nchildren; child++)
  190. {
  191. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  192. transfer_subtree_to_node(child_handle, src_node, dst_node);
  193. }
  194. }
  195. }
  196. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  197. {
  198. unsigned child;
  199. replicate->allocated = 0;
  200. /* XXX why do we need that ? */
  201. replicate->automatically_allocated = 0;
  202. for (child = 0; child < handle->nchildren; child++)
  203. {
  204. /* Notify children that their buffer has been deallocated too */
  205. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  206. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  207. }
  208. }
  209. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  210. {
  211. size_t freed = 0;
  212. STARPU_ASSERT(mc->ops);
  213. STARPU_ASSERT(mc->ops->free_data_on_node);
  214. starpu_data_handle_t handle = mc->data;
  215. struct _starpu_data_replicate *replicate = mc->replicate;
  216. if (handle)
  217. _starpu_spin_checklocked(&handle->header_lock);
  218. if (mc->automatically_allocated &&
  219. (!handle || replicate->refcnt == 0))
  220. {
  221. void *data_interface;
  222. if (handle)
  223. STARPU_ASSERT(replicate->allocated);
  224. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  225. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  226. {
  227. /* To facilitate the design of interface, we set the
  228. * proper CUDA device in case it is needed. This avoids
  229. * having to set it again in the free method of each
  230. * interface. */
  231. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  232. }
  233. #endif
  234. if (handle)
  235. data_interface = replicate->data_interface;
  236. else
  237. data_interface = mc->chunk_interface;
  238. STARPU_ASSERT(data_interface);
  239. _STARPU_TRACE_START_FREE(node, mc->size);
  240. mc->ops->free_data_on_node(data_interface, node);
  241. _STARPU_TRACE_END_FREE(node);
  242. if (handle)
  243. notify_handle_children(handle, replicate, node);
  244. freed = mc->size;
  245. if (handle)
  246. STARPU_ASSERT(replicate->refcnt == 0);
  247. }
  248. return freed;
  249. }
  250. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  251. {
  252. size_t size;
  253. starpu_data_handle_t handle = mc->data;
  254. if (handle) {
  255. _starpu_spin_checklocked(&handle->header_lock);
  256. mc->size = _starpu_data_get_size(handle);
  257. }
  258. if (mc->replicate)
  259. mc->replicate->mc=NULL;
  260. /* free the actual buffer */
  261. size = free_memory_on_node(mc, node);
  262. /* remove the mem_chunk from the list */
  263. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  264. _starpu_mem_chunk_delete(mc);
  265. return size;
  266. }
  267. /* This function is called for memory chunks that are possibly in used (ie. not
  268. * in the cache). They should therefore still be associated to a handle. */
  269. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  270. {
  271. size_t freed = 0;
  272. starpu_data_handle_t handle;
  273. handle = mc->data;
  274. STARPU_ASSERT(handle);
  275. /* This data should be written through to this node, avoid dropping it! */
  276. if (handle->wt_mask & (1<<node))
  277. return 0;
  278. /* This data was registered from this node, we will not be able to drop it anyway */
  279. if ((int) node == handle->home_node)
  280. return 0;
  281. /* REDUX memchunk */
  282. if (mc->relaxed_coherency == 2)
  283. {
  284. /* TODO: reduce it back to e.g. main memory */
  285. }
  286. else
  287. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  288. * memchunk that could be used with filters. */
  289. if (mc->relaxed_coherency == 1)
  290. {
  291. STARPU_ASSERT(mc->replicate);
  292. if (_starpu_spin_trylock(&handle->header_lock))
  293. /* Handle is busy, abort */
  294. return 0;
  295. if (mc->replicate->refcnt == 0)
  296. {
  297. /* Note that there is no need to transfer any data or
  298. * to update the status in terms of MSI protocol
  299. * because this memchunk is associated to a replicate
  300. * in "relaxed coherency" mode. */
  301. freed = do_free_mem_chunk(mc, node);
  302. }
  303. _starpu_spin_unlock(&handle->header_lock);
  304. }
  305. /* try to lock all the subtree */
  306. else if (lock_all_subtree(handle))
  307. {
  308. /* check if they are all "free" */
  309. if (may_free_subtree(handle, node))
  310. {
  311. int target = -1;
  312. /* XXX Considering only owner to invalidate */
  313. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  314. /* in case there was nobody using that buffer, throw it
  315. * away after writing it back to main memory */
  316. /* choose the best target */
  317. target = choose_target(handle, node);
  318. if (target != -1) {
  319. #ifdef STARPU_MEMORY_STATS
  320. if (handle->per_node[node].state == STARPU_OWNER)
  321. _starpu_memory_handle_stats_invalidated(handle, node);
  322. #endif
  323. _STARPU_TRACE_START_WRITEBACK(node);
  324. transfer_subtree_to_node(handle, node, target);
  325. _STARPU_TRACE_END_WRITEBACK(node);
  326. #ifdef STARPU_MEMORY_STATS
  327. _starpu_memory_handle_stats_loaded_owner(handle, target);
  328. #endif
  329. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  330. /* now the actual buffer may be freed */
  331. freed = do_free_mem_chunk(mc, node);
  332. }
  333. }
  334. /* unlock the tree */
  335. unlock_all_subtree(handle);
  336. }
  337. return freed;
  338. }
  339. #ifdef STARPU_USE_ALLOCATION_CACHE
  340. /* We assume that mc_lock[node] is taken. is_already_in_mc_list indicates
  341. * that the mc is already in the list of buffers that are possibly used, and
  342. * therefore not in the cache. */
  343. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  344. {
  345. void *data_interface;
  346. /* we found an appropriate mem chunk: so we get it out
  347. * of the "to free" list, and reassign it to the new
  348. * piece of data */
  349. struct _starpu_data_replicate *old_replicate = mc->replicate;
  350. if (old_replicate)
  351. {
  352. old_replicate->allocated = 0;
  353. old_replicate->automatically_allocated = 0;
  354. old_replicate->initialized = 0;
  355. data_interface = old_replicate->data_interface;
  356. }
  357. else
  358. data_interface = mc->chunk_interface;
  359. new_replicate->allocated = 1;
  360. new_replicate->automatically_allocated = 1;
  361. new_replicate->initialized = 0;
  362. STARPU_ASSERT(new_replicate->data_interface);
  363. STARPU_ASSERT(data_interface);
  364. memcpy(new_replicate->data_interface, data_interface, mc->size_interface);
  365. if (!old_replicate)
  366. {
  367. free(mc->chunk_interface);
  368. mc->chunk_interface = NULL;
  369. }
  370. mc->data = new_replicate->handle;
  371. /* mc->ops, mc->footprint and mc->interface should be
  372. * unchanged ! */
  373. /* reinsert the mem chunk in the list of active memory chunks */
  374. if (!is_already_in_mc_list)
  375. {
  376. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  377. }
  378. }
  379. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  380. {
  381. unsigned success = 0;
  382. starpu_data_handle_t old_data;
  383. old_data = mc->data;
  384. STARPU_ASSERT(old_data);
  385. /* try to lock all the subtree */
  386. /* and check if they are all "free" */
  387. if (lock_all_subtree(old_data))
  388. {
  389. if (may_free_subtree(old_data, node))
  390. {
  391. success = 1;
  392. /* in case there was nobody using that buffer, throw it
  393. * away after writing it back to main memory */
  394. _STARPU_TRACE_START_WRITEBACK(node);
  395. transfer_subtree_to_node(old_data, node, 0);
  396. _STARPU_TRACE_END_WRITEBACK(node);
  397. /* now replace the previous data */
  398. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  399. }
  400. /* unlock the tree */
  401. unlock_all_subtree(old_data);
  402. }
  403. return success;
  404. }
  405. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  406. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  407. {
  408. if (ops_a->interfaceid != ops_b->interfaceid)
  409. return -1;
  410. int ret = ops_a->compare(data_interface_a, data_interface_b);
  411. return ret;
  412. }
  413. /* This function must be called with mc_lock[node] taken */
  414. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
  415. {
  416. /* go through all buffers in the cache */
  417. struct mc_cache_entry *entry;
  418. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  419. if (!entry)
  420. /* No data with that footprint */
  421. return NULL;
  422. struct _starpu_mem_chunk *mc;
  423. for (mc = _starpu_mem_chunk_list_begin(entry->list);
  424. mc != _starpu_mem_chunk_list_end(entry->list);
  425. mc = _starpu_mem_chunk_list_next(mc))
  426. {
  427. /* Is that a false hit ? (this is _very_ unlikely) */
  428. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops))
  429. continue;
  430. /* Cache hit */
  431. /* Remove from the cache */
  432. _starpu_mem_chunk_list_erase(entry->list, mc);
  433. return mc;
  434. }
  435. /* This is a cache miss */
  436. return NULL;
  437. }
  438. /* this function looks for a memory chunk that matches a given footprint in the
  439. * list of mem chunk that need to be freed. This function must be called with
  440. * mc_lock[node] taken. */
  441. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  442. {
  443. struct _starpu_mem_chunk *mc, *next_mc;
  444. /* go through all buffers in the cache */
  445. mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint);
  446. if (mc)
  447. {
  448. /* We found an entry in the cache so we can reuse it */
  449. reuse_mem_chunk(node, replicate, mc, 0);
  450. return 1;
  451. }
  452. if (!_starpu_has_not_important_data)
  453. return 0;
  454. /* now look for some non essential data in the active list */
  455. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  456. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  457. mc = next_mc)
  458. {
  459. /* there is a risk that the memory chunk is freed before next
  460. * iteration starts: so we compute the next element of the list
  461. * now */
  462. next_mc = _starpu_mem_chunk_list_next(mc);
  463. if (mc->data->is_not_important && (mc->footprint == footprint))
  464. {
  465. // fprintf(stderr, "found a candidate ...\n");
  466. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  467. return 1;
  468. }
  469. }
  470. return 0;
  471. }
  472. #endif
  473. /*
  474. * Free the memory chuncks that are explicitely tagged to be freed. The
  475. * mc_lock[node] rw-lock should be taken prior to calling this function.
  476. */
  477. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  478. {
  479. struct _starpu_mem_chunk *mc;
  480. struct _starpu_mem_chunk_list *busy_mc_cache;
  481. struct mc_cache_entry *entry, *tmp;
  482. size_t freed = 0;
  483. _starpu_spin_lock(&mc_lock[node]);
  484. HASH_ITER(hh, mc_cache[node], entry, tmp)
  485. {
  486. busy_mc_cache = _starpu_mem_chunk_list_new();
  487. while (!_starpu_mem_chunk_list_empty(entry->list)) {
  488. mc = _starpu_mem_chunk_list_pop_front(entry->list);
  489. starpu_data_handle_t handle = mc->data;
  490. if (handle)
  491. if (_starpu_spin_trylock(&handle->header_lock)) {
  492. /* The handle is still busy, leave this chunk for later */
  493. _starpu_mem_chunk_list_push_back(busy_mc_cache, mc);
  494. continue;
  495. }
  496. freed += free_memory_on_node(mc, node);
  497. if (handle)
  498. _starpu_spin_unlock(&handle->header_lock);
  499. free(mc->chunk_interface);
  500. _starpu_mem_chunk_delete(mc);
  501. if (reclaim && freed >= reclaim)
  502. break;
  503. }
  504. _starpu_mem_chunk_list_push_list_front(busy_mc_cache, entry->list);
  505. _starpu_mem_chunk_list_delete(busy_mc_cache);
  506. if (reclaim && freed >= reclaim)
  507. break;
  508. }
  509. _starpu_spin_unlock(&mc_lock[node]);
  510. return freed;
  511. }
  512. /*
  513. * Try to free the buffers currently in use on the memory node. If the force
  514. * flag is set, the memory is freed regardless of coherency concerns (this
  515. * should only be used at the termination of StarPU for instance). The
  516. * mc_lock[node] should be taken prior to calling this function.
  517. */
  518. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  519. {
  520. size_t freed = 0;
  521. struct _starpu_mem_chunk *mc, *next_mc;
  522. /*
  523. * We have to unlock mc_lock before locking header_lock, so we have
  524. * to be careful with the list. We try to do just one pass, by
  525. * remembering the next mc to be tried. If it gets dropped, we restart
  526. * from zero. So we continue until we go through the whole list without
  527. * finding anything to free.
  528. */
  529. restart:
  530. _starpu_spin_lock(&mc_lock[node]);
  531. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  532. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  533. mc = next_mc)
  534. {
  535. /* mc hopefully gets out of the list, we thus need to prefetch
  536. * the next element */
  537. next_mc = _starpu_mem_chunk_list_next(mc);
  538. if (!force)
  539. {
  540. freed += try_to_free_mem_chunk(mc, node);
  541. if (reclaim && freed >= reclaim)
  542. break;
  543. }
  544. else
  545. {
  546. starpu_data_handle_t handle = mc->data;
  547. if (_starpu_spin_trylock(&handle->header_lock))
  548. {
  549. /* Ergl. We are shutting down, but somebody is
  550. * still locking the handle. That's not
  551. * supposed to happen, but better be safe by
  552. * letting it go through. */
  553. _starpu_spin_unlock(&mc_lock[node]);
  554. goto restart;
  555. }
  556. /* We must free the memory now, because we are
  557. * terminating the drivers: note that data coherency is
  558. * not maintained in that case ! */
  559. freed += do_free_mem_chunk(mc, node);
  560. _starpu_spin_unlock(&handle->header_lock);
  561. }
  562. }
  563. _starpu_spin_unlock(&mc_lock[node]);
  564. return freed;
  565. }
  566. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  567. {
  568. size_t freed = 0;
  569. if (reclaim && !force)
  570. {
  571. static int warned;
  572. char name[32];
  573. _starpu_memory_node_get_name(node, name, sizeof(name));
  574. if (!warned) {
  575. _STARPU_DISP("Not enough memory left on node %s. Your application working set is probably simply just hard to fit in the devices, but StarPU will cope with it by trying to purge %lu bytes out. This message will not be printed again for further purges\n", name, (unsigned long) reclaim);
  576. warned = 1;
  577. }
  578. }
  579. /* remove all buffers for which there was a removal request */
  580. freed += flush_memchunk_cache(node, reclaim);
  581. /* try to free all allocated data potentially in use */
  582. if (reclaim && freed<reclaim)
  583. freed += free_potentially_in_use_mc(node, force, reclaim);
  584. return freed;
  585. }
  586. /*
  587. * This function frees all the memory that was implicitely allocated by StarPU
  588. * (for the data replicates). This is not ensuring data coherency, and should
  589. * only be called while StarPU is getting shut down.
  590. */
  591. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  592. {
  593. return _starpu_memory_reclaim_generic(node, 1, 0);
  594. }
  595. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  596. {
  597. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  598. starpu_data_handle_t handle = replicate->handle;
  599. STARPU_ASSERT(handle);
  600. STARPU_ASSERT(handle->ops);
  601. mc->data = handle;
  602. mc->footprint = _starpu_compute_data_footprint(handle);
  603. mc->ops = handle->ops;
  604. mc->automatically_allocated = automatically_allocated;
  605. mc->relaxed_coherency = replicate->relaxed_coherency;
  606. mc->replicate = replicate;
  607. mc->replicate->mc = mc;
  608. mc->chunk_interface = NULL;
  609. mc->size_interface = interface_size;
  610. return mc;
  611. }
  612. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  613. {
  614. unsigned dst_node = replicate->memory_node;
  615. struct _starpu_mem_chunk *mc;
  616. /* the interface was already filled by ops->allocate_data_on_node */
  617. size_t interface_size = replicate->handle->ops->interface_size;
  618. /* Put this memchunk in the list of memchunk in use */
  619. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  620. _starpu_spin_lock(&mc_lock[dst_node]);
  621. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  622. _starpu_spin_unlock(&mc_lock[dst_node]);
  623. }
  624. /* This function is called when the handle is destroyed (eg. when calling
  625. * unregister or unpartition). It puts all the memchunks that refer to the
  626. * specified handle into the cache.
  627. */
  628. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  629. {
  630. struct _starpu_mem_chunk *mc = replicate->mc;
  631. STARPU_ASSERT(mc->data == handle);
  632. /* Record the allocated size, so that later in memory
  633. * reclaiming we can estimate how much memory we free
  634. * by freeing this. */
  635. mc->size = size;
  636. /* Also keep the interface parameters and pointers, for later reuse
  637. * while detached, or freed */
  638. mc->chunk_interface = malloc(mc->size_interface);
  639. memcpy(mc->chunk_interface, replicate->data_interface, mc->size_interface);
  640. /* This memchunk doesn't have to do with the data any more. */
  641. replicate->mc = NULL;
  642. mc->replicate = NULL;
  643. replicate->allocated = 0;
  644. replicate->automatically_allocated = 0;
  645. replicate->initialized = 0;
  646. _starpu_spin_lock(&mc_lock[node]);
  647. mc->data = NULL;
  648. /* remove it from the main list */
  649. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  650. _starpu_spin_unlock(&mc_lock[node]);
  651. /* We would only flush the RAM nodes cache if memory gets tight, either
  652. * because StarPU automatically knows the total memory size of the
  653. * machine, or because the user has provided a limitation.
  654. *
  655. * We don't really want the former scenario to be eating a lot of
  656. * memory just for caching allocations. Allocating main memory is cheap
  657. * anyway.
  658. */
  659. /* This is particularly important when
  660. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  661. * wouldn't even re-use these allocations! */
  662. if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
  663. {
  664. /* Free data immediately */
  665. free_memory_on_node(mc, node);
  666. free(mc->chunk_interface);
  667. _starpu_mem_chunk_delete(mc);
  668. }
  669. else
  670. {
  671. /* put it in the list of buffers to be removed */
  672. uint32_t footprint = mc->footprint;
  673. struct mc_cache_entry *entry;
  674. _starpu_spin_lock(&mc_lock[node]);
  675. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  676. if (!entry) {
  677. entry = malloc(sizeof(*entry));
  678. entry->list = _starpu_mem_chunk_list_new();
  679. entry->footprint = footprint;
  680. HASH_ADD(hh, mc_cache[node], footprint, sizeof(entry->footprint), entry);
  681. }
  682. _starpu_mem_chunk_list_push_front(entry->list, mc);
  683. _starpu_spin_unlock(&mc_lock[node]);
  684. }
  685. }
  686. /*
  687. * In order to allocate a piece of data, we try to reuse existing buffers if
  688. * its possible.
  689. * 1 - we try to reuse a memchunk that is explicitely unused.
  690. * 2 - we go through the list of memory chunks and find one that is not
  691. * referenced and that has the same footprint to reuse it.
  692. * 3 - we call the usual driver's alloc method
  693. * 4 - we go through the list of memory chunks and release those that are
  694. * not referenced (or part of those).
  695. *
  696. */
  697. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  698. {
  699. unsigned attempts = 0;
  700. starpu_ssize_t allocated_memory;
  701. int ret;
  702. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  703. _starpu_spin_checklocked(&handle->header_lock);
  704. _starpu_data_allocation_inc_stats(dst_node);
  705. #ifdef STARPU_USE_ALLOCATION_CACHE
  706. /* perhaps we can directly reuse a buffer in the free-list */
  707. uint32_t footprint = _starpu_compute_data_footprint(handle);
  708. _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size);
  709. _starpu_spin_lock(&mc_lock[dst_node]);
  710. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  711. {
  712. _starpu_spin_unlock(&mc_lock[dst_node]);
  713. _starpu_allocation_cache_hit(dst_node);
  714. return data_size;
  715. }
  716. _starpu_spin_unlock(&mc_lock[dst_node]);
  717. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  718. #endif
  719. STARPU_ASSERT(handle->ops);
  720. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  721. STARPU_ASSERT(replicate->data_interface);
  722. char data_interface[handle->ops->interface_size];
  723. memcpy(data_interface, replicate->data_interface, handle->ops->interface_size);
  724. /* Take temporary reference on the replicate */
  725. replicate->refcnt++;
  726. handle->busy_count++;
  727. _starpu_spin_unlock(&handle->header_lock);
  728. do
  729. {
  730. _STARPU_TRACE_START_ALLOC(dst_node, data_size);
  731. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  732. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  733. {
  734. /* To facilitate the design of interface, we set the
  735. * proper CUDA device in case it is needed. This avoids
  736. * having to set it again in the malloc method of each
  737. * interface. */
  738. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  739. }
  740. #endif
  741. allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node);
  742. _STARPU_TRACE_END_ALLOC(dst_node);
  743. if (allocated_memory == -ENOMEM)
  744. {
  745. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  746. size_t handle_size = handle->ops->get_size(handle);
  747. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  748. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  749. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  750. if (is_prefetch)
  751. {
  752. flush_memchunk_cache(dst_node, reclaim);
  753. }
  754. else
  755. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  756. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  757. }
  758. }
  759. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  760. int cpt = 0;
  761. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  762. {
  763. cpt++;
  764. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  765. }
  766. if (cpt == STARPU_SPIN_MAXTRY)
  767. _starpu_spin_lock(&handle->header_lock);
  768. replicate->refcnt--;
  769. STARPU_ASSERT(replicate->refcnt >= 0);
  770. STARPU_ASSERT(handle->busy_count > 0);
  771. handle->busy_count--;
  772. ret = _starpu_data_check_not_busy(handle);
  773. STARPU_ASSERT(ret == 0);
  774. if (replicate->allocated)
  775. {
  776. /* Argl, somebody allocated it in between already, drop this one */
  777. _STARPU_TRACE_START_FREE(dst_node, data_size);
  778. handle->ops->free_data_on_node(data_interface, dst_node);
  779. _STARPU_TRACE_END_FREE(dst_node);
  780. allocated_memory = 0;
  781. }
  782. else
  783. /* Install allocated interface */
  784. memcpy(replicate->data_interface, data_interface, handle->ops->interface_size);
  785. return allocated_memory;
  786. }
  787. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  788. {
  789. starpu_ssize_t allocated_memory;
  790. unsigned dst_node = replicate->memory_node;
  791. STARPU_ASSERT(handle);
  792. /* A buffer is already allocated on the node */
  793. if (replicate->allocated)
  794. return 0;
  795. STARPU_ASSERT(replicate->data_interface);
  796. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  797. /* perhaps we could really not handle that capacity misses */
  798. if (allocated_memory == -ENOMEM)
  799. return -ENOMEM;
  800. register_mem_chunk(replicate, 1);
  801. replicate->allocated = 1;
  802. replicate->automatically_allocated = 1;
  803. if (replicate->relaxed_coherency == 0 && dst_node == STARPU_MAIN_RAM)
  804. {
  805. /* We are allocating the buffer in main memory, also register it
  806. * for the gcc plugin. */
  807. void *ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
  808. if (ptr != NULL)
  809. {
  810. _starpu_data_register_ram_pointer(handle, ptr);
  811. }
  812. }
  813. return 0;
  814. }
  815. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  816. {
  817. return handle->per_node[memory_node].allocated;
  818. }
  819. /* This memchunk has been recently used, put it last on the mc_list, so we will
  820. * try to evict it as late as possible */
  821. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  822. {
  823. if (!mc)
  824. /* user-allocated memory */
  825. return;
  826. _starpu_spin_lock(&mc_lock[node]);
  827. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  828. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  829. _starpu_spin_unlock(&mc_lock[node]);
  830. }
  831. #ifdef STARPU_MEMORY_STATS
  832. void _starpu_memory_display_stats_by_node(int node)
  833. {
  834. _starpu_spin_lock(&mc_lock[node]);
  835. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  836. {
  837. struct _starpu_mem_chunk *mc;
  838. fprintf(stderr, "#-------\n");
  839. fprintf(stderr, "Data on Node #%d\n",node);
  840. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  841. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  842. mc = _starpu_mem_chunk_list_next(mc))
  843. {
  844. if (mc->automatically_allocated == 0)
  845. _starpu_memory_display_handle_stats(mc->data);
  846. }
  847. }
  848. _starpu_spin_unlock(&mc_lock[node]);
  849. }
  850. #endif
  851. void starpu_data_display_memory_stats(void)
  852. {
  853. #ifdef STARPU_MEMORY_STATS
  854. unsigned node;
  855. fprintf(stderr, "\n#---------------------\n");
  856. fprintf(stderr, "Memory stats :\n");
  857. for (node = 0; node < STARPU_MAXNODES; node++)
  858. {
  859. _starpu_memory_display_stats_by_node(node);
  860. }
  861. fprintf(stderr, "\n#---------------------\n");
  862. #endif
  863. }
  864. static int
  865. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  866. {
  867. int target = -1;
  868. unsigned nnodes = starpu_memory_nodes_get_count();
  869. unsigned int i;
  870. double time_disk = 0;
  871. for (i = 0; i < nnodes; i++)
  872. {
  873. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  874. (_starpu_memory_manager_test_allocate_size_(_starpu_data_get_size(handle), i) == 1 ||
  875. handle->per_node[i].allocated))
  876. {
  877. /* if we can write on the disk */
  878. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  879. {
  880. /* only time can change between disk <-> main_ram
  881. * and not between main_ram <-> worker if we compare diks*/
  882. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  883. if (target == -1 || time_disk > time_tmp)
  884. {
  885. target = i;
  886. time_disk = time_tmp;
  887. }
  888. }
  889. }
  890. }
  891. return target;
  892. }
  893. static unsigned
  894. choose_target(starpu_data_handle_t handle, unsigned node)
  895. {
  896. int target = -1;
  897. size_t size_handle = _starpu_data_get_size(handle);
  898. if (handle->home_node != -1)
  899. /* try to push on RAM if we can before to push on disk */
  900. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  901. {
  902. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  903. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  904. {
  905. target = STARPU_MAIN_RAM;
  906. }
  907. else
  908. {
  909. target = get_better_disk_can_accept_size(handle, node);
  910. }
  911. }
  912. /* others memory nodes */
  913. else
  914. {
  915. target = handle->home_node;
  916. }
  917. else
  918. {
  919. /* handle->home_node == -1 */
  920. /* no place for datas in RAM, we push on disk */
  921. if (node == STARPU_MAIN_RAM)
  922. {
  923. target = get_better_disk_can_accept_size(handle, node);
  924. }
  925. /* node != 0 */
  926. /* try to push data to RAM if we can before to push on disk*/
  927. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  928. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  929. {
  930. target = STARPU_MAIN_RAM;
  931. }
  932. /* no place in RAM */
  933. else
  934. {
  935. target = get_better_disk_can_accept_size(handle, node);
  936. }
  937. }
  938. /* we haven't the right to write on the disk */
  939. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  940. target = -1;
  941. return target;
  942. }