memalloc.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  23. /* Note: handle header lock is always taken before this */
  24. static starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  25. /* Potentially in use memory chunks */
  26. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  27. /* Explicitly caches memory chunks that can be reused */
  28. static struct _starpu_mem_chunk_list *memchunk_cache[STARPU_MAXNODES];
  29. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  30. const unsigned starpu_memstrategy_data_size_coefficient=2;
  31. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  32. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  33. void _starpu_init_mem_chunk_lists(void)
  34. {
  35. unsigned i;
  36. for (i = 0; i < STARPU_MAXNODES; i++)
  37. {
  38. STARPU_PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  39. mc_list[i] = _starpu_mem_chunk_list_new();
  40. memchunk_cache[i] = _starpu_mem_chunk_list_new();
  41. }
  42. }
  43. void _starpu_deinit_mem_chunk_lists(void)
  44. {
  45. unsigned i;
  46. for (i = 0; i < STARPU_MAXNODES; i++)
  47. {
  48. _starpu_mem_chunk_list_delete(mc_list[i]);
  49. _starpu_mem_chunk_list_delete(memchunk_cache[i]);
  50. STARPU_PTHREAD_RWLOCK_DESTROY(&mc_rwlock[i]);
  51. }
  52. }
  53. /*
  54. * Manipulate subtrees
  55. */
  56. static void unlock_all_subtree(starpu_data_handle_t handle)
  57. {
  58. /* lock all sub-subtrees children
  59. * Note that this is done in the reverse order of the
  60. * lock_all_subtree so that we avoid deadlock */
  61. unsigned i;
  62. for (i =0; i < handle->nchildren; i++)
  63. {
  64. unsigned child = handle->nchildren - 1 - i;
  65. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  66. unlock_all_subtree(child_handle);
  67. }
  68. _starpu_spin_unlock(&handle->header_lock);
  69. }
  70. static int lock_all_subtree(starpu_data_handle_t handle)
  71. {
  72. int child;
  73. /* lock parent */
  74. if (_starpu_spin_trylock(&handle->header_lock))
  75. /* the handle is busy, abort */
  76. return 0;
  77. /* lock all sub-subtrees children */
  78. for (child = 0; child < (int) handle->nchildren; child++)
  79. {
  80. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  81. /* Some child is busy, abort */
  82. while (--child >= 0)
  83. /* Unlock what we have already uselessly locked */
  84. unlock_all_subtree(starpu_data_get_child(handle, child));
  85. return 0;
  86. }
  87. }
  88. return 1;
  89. }
  90. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  91. {
  92. /* we only free if no one refers to the leaf */
  93. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  94. if (refcnt)
  95. return 0;
  96. if (!handle->nchildren)
  97. return 1;
  98. /* look into all sub-subtrees children */
  99. unsigned child;
  100. for (child = 0; child < handle->nchildren; child++)
  101. {
  102. unsigned res;
  103. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  104. res = may_free_subtree(child_handle, node);
  105. if (!res) return 0;
  106. }
  107. /* no problem was found */
  108. return 1;
  109. }
  110. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  111. unsigned dst_node)
  112. {
  113. unsigned i;
  114. unsigned last = 0;
  115. unsigned cnt;
  116. int ret;
  117. STARPU_ASSERT(dst_node != src_node);
  118. if (handle->nchildren == 0)
  119. {
  120. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  121. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  122. /* this is a leaf */
  123. switch(src_replicate->state)
  124. {
  125. case STARPU_OWNER:
  126. /* the local node has the only copy */
  127. /* the owner is now the destination_node */
  128. src_replicate->state = STARPU_INVALID;
  129. dst_replicate->state = STARPU_OWNER;
  130. #ifdef STARPU_DEVEL
  131. #warning we should use requests during memory reclaim
  132. #endif
  133. /* TODO use request !! */
  134. /* Take temporary references on the replicates */
  135. _starpu_spin_checklocked(&handle->header_lock);
  136. src_replicate->refcnt++;
  137. dst_replicate->refcnt++;
  138. handle->busy_count+=2;
  139. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  140. STARPU_ASSERT(ret == 0);
  141. src_replicate->refcnt--;
  142. dst_replicate->refcnt--;
  143. STARPU_ASSERT(handle->busy_count >= 2);
  144. handle->busy_count -= 2;
  145. ret = _starpu_data_check_not_busy(handle);
  146. STARPU_ASSERT(ret == 0);
  147. break;
  148. case STARPU_SHARED:
  149. /* some other node may have the copy */
  150. src_replicate->state = STARPU_INVALID;
  151. /* count the number of copies */
  152. cnt = 0;
  153. for (i = 0; i < STARPU_MAXNODES; i++)
  154. {
  155. if (handle->per_node[i].state == STARPU_SHARED)
  156. {
  157. cnt++;
  158. last = i;
  159. }
  160. }
  161. STARPU_ASSERT(cnt > 0);
  162. if (cnt == 1)
  163. handle->per_node[last].state = STARPU_OWNER;
  164. break;
  165. case STARPU_INVALID:
  166. /* nothing to be done */
  167. break;
  168. default:
  169. STARPU_ABORT();
  170. break;
  171. }
  172. }
  173. else
  174. {
  175. /* lock all sub-subtrees children */
  176. unsigned child;
  177. for (child = 0; child < handle->nchildren; child++)
  178. {
  179. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  180. transfer_subtree_to_node(child_handle, src_node, dst_node);
  181. }
  182. }
  183. }
  184. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  185. {
  186. unsigned child;
  187. replicate->allocated = 0;
  188. /* XXX why do we need that ? */
  189. replicate->automatically_allocated = 0;
  190. for (child = 0; child < handle->nchildren; child++)
  191. {
  192. /* Notify children that their buffer has been deallocated too */
  193. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  194. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  195. }
  196. }
  197. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  198. {
  199. size_t freed = 0;
  200. STARPU_ASSERT(mc->ops);
  201. STARPU_ASSERT(mc->ops->free_data_on_node);
  202. starpu_data_handle_t handle = mc->data;
  203. struct _starpu_data_replicate *replicate = mc->replicate;
  204. if (handle)
  205. _starpu_spin_checklocked(&handle->header_lock);
  206. if (mc->automatically_allocated &&
  207. (!handle || replicate->refcnt == 0))
  208. {
  209. if (handle)
  210. STARPU_ASSERT(replicate->allocated);
  211. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  212. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  213. {
  214. /* To facilitate the design of interface, we set the
  215. * proper CUDA device in case it is needed. This avoids
  216. * having to set it again in the free method of each
  217. * interface. */
  218. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  219. }
  220. #endif
  221. mc->ops->free_data_on_node(mc->chunk_interface, node);
  222. if (handle)
  223. notify_handle_children(handle, replicate, node);
  224. freed = mc->size;
  225. if (handle)
  226. STARPU_ASSERT(replicate->refcnt == 0);
  227. }
  228. return freed;
  229. }
  230. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  231. {
  232. size_t size;
  233. starpu_data_handle_t handle = mc->data;
  234. if (handle) {
  235. _starpu_spin_checklocked(&handle->header_lock);
  236. mc->size = _starpu_data_get_size(handle);
  237. }
  238. mc->replicate->mc=NULL;
  239. /* free the actual buffer */
  240. size = free_memory_on_node(mc, node);
  241. /* remove the mem_chunk from the list */
  242. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  243. free(mc->chunk_interface);
  244. _starpu_mem_chunk_delete(mc);
  245. return size;
  246. }
  247. /* This function is called for memory chunks that are possibly in used (ie. not
  248. * in the cache). They should therefore still be associated to a handle. */
  249. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  250. {
  251. size_t freed = 0;
  252. starpu_data_handle_t handle;
  253. handle = mc->data;
  254. STARPU_ASSERT(handle);
  255. /* This data should be written through to this node, avoid dropping it! */
  256. if (handle->wt_mask & (1<<node))
  257. return 0;
  258. /* This data was registered from this node, we will not be able to drop it anyway */
  259. if ((int) node == handle->home_node)
  260. return 0;
  261. /* REDUX memchunk */
  262. if (mc->relaxed_coherency == 2)
  263. {
  264. /* TODO: reduce it back to e.g. main memory */
  265. }
  266. else
  267. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  268. * memchunk that could be used with filters. */
  269. if (mc->relaxed_coherency == 1)
  270. {
  271. STARPU_ASSERT(mc->replicate);
  272. if (_starpu_spin_trylock(&handle->header_lock))
  273. /* Handle is busy, abort */
  274. return 0;
  275. if (mc->replicate->refcnt == 0)
  276. {
  277. /* Note taht there is no need to transfer any data or
  278. * to update the status in terms of MSI protocol
  279. * because this memchunk is associated to a replicate
  280. * in "relaxed coherency" mode. */
  281. freed = do_free_mem_chunk(mc, node);
  282. }
  283. _starpu_spin_unlock(&handle->header_lock);
  284. }
  285. /* try to lock all the subtree */
  286. else if (lock_all_subtree(handle))
  287. {
  288. /* check if they are all "free" */
  289. if (may_free_subtree(handle, node))
  290. {
  291. int target = -1;
  292. /* XXX Considering only owner to invalidate */
  293. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  294. /* in case there was nobody using that buffer, throw it
  295. * away after writing it back to main memory */
  296. /* choose the best target */
  297. target = choose_target(handle, node);
  298. if (target != -1) {
  299. #ifdef STARPU_MEMORY_STATS
  300. if (handle->per_node[node].state == STARPU_OWNER)
  301. _starpu_memory_handle_stats_invalidated(handle, node);
  302. #endif
  303. transfer_subtree_to_node(handle, node, target);
  304. #ifdef STARPU_MEMORY_STATS
  305. _starpu_memory_handle_stats_loaded_owner(handle, target);
  306. #endif
  307. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  308. /* now the actual buffer may be freed */
  309. freed = do_free_mem_chunk(mc, node);
  310. }
  311. }
  312. /* unlock the tree */
  313. unlock_all_subtree(handle);
  314. }
  315. return freed;
  316. }
  317. #ifdef STARPU_USE_ALLOCATION_CACHE
  318. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  319. * that the mc is already in the list of buffers that are possibly used, and
  320. * therefore not in the cache. */
  321. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  322. {
  323. /* we found an appropriate mem chunk: so we get it out
  324. * of the "to free" list, and reassign it to the new
  325. * piece of data */
  326. if (!is_already_in_mc_list)
  327. {
  328. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  329. }
  330. struct _starpu_data_replicate *old_replicate = mc->replicate;
  331. old_replicate->allocated = 0;
  332. old_replicate->automatically_allocated = 0;
  333. old_replicate->initialized = 0;
  334. new_replicate->allocated = 1;
  335. new_replicate->automatically_allocated = 1;
  336. new_replicate->initialized = 0;
  337. STARPU_ASSERT(new_replicate->data_interface);
  338. STARPU_ASSERT(mc->chunk_interface);
  339. memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
  340. mc->data = new_replicate->handle;
  341. /* mc->ops, mc->footprint and mc->interface should be
  342. * unchanged ! */
  343. /* reinsert the mem chunk in the list of active memory chunks */
  344. if (!is_already_in_mc_list)
  345. {
  346. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  347. }
  348. }
  349. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  350. {
  351. unsigned success = 0;
  352. starpu_data_handle_t old_data;
  353. old_data = mc->data;
  354. STARPU_ASSERT(old_data);
  355. /* try to lock all the subtree */
  356. /* and check if they are all "free" */
  357. if (lock_all_subtree(old_data))
  358. {
  359. if (may_free_subtree(old_data, node))
  360. {
  361. success = 1;
  362. /* in case there was nobody using that buffer, throw it
  363. * away after writing it back to main memory */
  364. transfer_subtree_to_node(old_data, node, 0);
  365. /* now replace the previous data */
  366. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  367. }
  368. /* unlock the tree */
  369. unlock_all_subtree(old_data);
  370. }
  371. return success;
  372. }
  373. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  374. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  375. {
  376. if (ops_a->interfaceid != ops_b->interfaceid)
  377. return -1;
  378. int ret = ops_a->compare(data_interface_a, data_interface_b);
  379. return ret;
  380. }
  381. /* This function must be called with mc_rwlock[node] taken in write mode */
  382. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle)
  383. {
  384. uint32_t footprint = _starpu_compute_data_footprint(handle);
  385. /* go through all buffers in the cache */
  386. struct _starpu_mem_chunk *mc;
  387. for (mc = _starpu_mem_chunk_list_begin(memchunk_cache[node]);
  388. mc != _starpu_mem_chunk_list_end(memchunk_cache[node]);
  389. mc = _starpu_mem_chunk_list_next(mc))
  390. {
  391. if (mc->footprint == footprint)
  392. {
  393. /* Is that a false hit ? (this is _very_ unlikely) */
  394. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops))
  395. continue;
  396. /* Cache hit */
  397. /* Remove from the cache */
  398. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  399. return mc;
  400. }
  401. }
  402. /* This is a cache miss */
  403. return NULL;
  404. }
  405. /* this function looks for a memory chunk that matches a given footprint in the
  406. * list of mem chunk that need to be freed. This function must be called with
  407. * mc_rwlock[node] taken in write mode. */
  408. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  409. {
  410. struct _starpu_mem_chunk *mc, *next_mc;
  411. /* go through all buffers in the cache */
  412. mc = _starpu_memchunk_cache_lookup_locked(node, data);
  413. if (mc)
  414. {
  415. /* We found an entry in the cache so we can reuse it */
  416. reuse_mem_chunk(node, replicate, mc, 0);
  417. return 1;
  418. }
  419. /* now look for some non essential data in the active list */
  420. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  421. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  422. mc = next_mc)
  423. {
  424. /* there is a risk that the memory chunk is freed before next
  425. * iteration starts: so we compute the next element of the list
  426. * now */
  427. next_mc = _starpu_mem_chunk_list_next(mc);
  428. if (mc->data->is_not_important && (mc->footprint == footprint))
  429. {
  430. // fprintf(stderr, "found a candidate ...\n");
  431. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  432. return 1;
  433. }
  434. }
  435. return 0;
  436. }
  437. #endif
  438. /*
  439. * Free the memory chuncks that are explicitely tagged to be freed. The
  440. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  441. */
  442. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  443. {
  444. struct _starpu_mem_chunk *mc;
  445. struct _starpu_mem_chunk_list *busy_memchunk_cache;
  446. size_t freed = 0;
  447. if (_starpu_mem_chunk_list_empty(memchunk_cache[node]))
  448. return 0;
  449. busy_memchunk_cache = _starpu_mem_chunk_list_new();
  450. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  451. while (!_starpu_mem_chunk_list_empty(memchunk_cache[node])) {
  452. mc = _starpu_mem_chunk_list_pop_front(memchunk_cache[node]);
  453. starpu_data_handle_t handle = mc->data;
  454. if (handle)
  455. if (_starpu_spin_trylock(&handle->header_lock)) {
  456. /* The handle is still busy, leave this chunk for later */
  457. _starpu_mem_chunk_list_push_front(busy_memchunk_cache, mc);
  458. continue;
  459. }
  460. freed += free_memory_on_node(mc, node);
  461. if (handle)
  462. _starpu_spin_unlock(&handle->header_lock);
  463. free(mc->chunk_interface);
  464. _starpu_mem_chunk_delete(mc);
  465. if (reclaim && freed >= reclaim)
  466. break;
  467. }
  468. _starpu_mem_chunk_list_push_list_front(busy_memchunk_cache, memchunk_cache[node]);
  469. _starpu_mem_chunk_list_delete(busy_memchunk_cache);
  470. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  471. return freed;
  472. }
  473. /*
  474. * Try to free the buffers currently in use on the memory node. If the force
  475. * flag is set, the memory is freed regardless of coherency concerns (this
  476. * should only be used at the termination of StarPU for instance). The
  477. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  478. */
  479. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  480. {
  481. size_t freed = 0;
  482. struct _starpu_mem_chunk *mc, *next_mc;
  483. /*
  484. * We have to unlock mc_rwlock before locking header_lock, so we have
  485. * to be careful with the list. We try to do just one pass, by
  486. * remembering the next mc to be tried. If it gets dropped, we restart
  487. * from zero. So we continue until we go through the whole list without
  488. * finding anything to free.
  489. */
  490. restart:
  491. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  492. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  493. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  494. mc = next_mc)
  495. {
  496. /* mc hopefully gets out of the list, we thus need to prefetch
  497. * the next element */
  498. next_mc = _starpu_mem_chunk_list_next(mc);
  499. if (!force)
  500. {
  501. freed += try_to_free_mem_chunk(mc, node);
  502. if (reclaim && freed >= reclaim)
  503. break;
  504. }
  505. else
  506. {
  507. starpu_data_handle_t handle = mc->data;
  508. if (_starpu_spin_trylock(&handle->header_lock))
  509. {
  510. /* Ergl. We are shutting down, but somebody is
  511. * still locking the handle. That's not
  512. * supposed to happen, but better be safe by
  513. * letting it go through. */
  514. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  515. goto restart;
  516. }
  517. /* We must free the memory now, because we are
  518. * terminating the drivers: note that data coherency is
  519. * not maintained in that case ! */
  520. freed += do_free_mem_chunk(mc, node);
  521. _starpu_spin_unlock(&handle->header_lock);
  522. }
  523. }
  524. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  525. return freed;
  526. }
  527. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  528. {
  529. size_t freed = 0;
  530. if (reclaim && !force)
  531. {
  532. static int warned;
  533. if (!warned) {
  534. _STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out\n", node, (unsigned long) reclaim);
  535. warned = 1;
  536. }
  537. }
  538. /* remove all buffers for which there was a removal request */
  539. freed += flush_memchunk_cache(node, reclaim);
  540. /* try to free all allocated data potentially in use */
  541. if (reclaim && freed<reclaim)
  542. freed += free_potentially_in_use_mc(node, force, reclaim);
  543. return freed;
  544. }
  545. /*
  546. * This function frees all the memory that was implicitely allocated by StarPU
  547. * (for the data replicates). This is not ensuring data coherency, and should
  548. * only be called while StarPU is getting shut down.
  549. */
  550. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  551. {
  552. return _starpu_memory_reclaim_generic(node, 1, 0);
  553. }
  554. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  555. {
  556. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  557. starpu_data_handle_t handle = replicate->handle;
  558. STARPU_ASSERT(handle);
  559. STARPU_ASSERT(handle->ops);
  560. mc->data = handle;
  561. mc->footprint = _starpu_compute_data_footprint(handle);
  562. mc->ops = handle->ops;
  563. mc->automatically_allocated = automatically_allocated;
  564. mc->relaxed_coherency = replicate->relaxed_coherency;
  565. mc->replicate = replicate;
  566. mc->replicate->mc = mc;
  567. /* Save a copy of the interface */
  568. mc->chunk_interface = malloc(interface_size);
  569. STARPU_ASSERT(mc->chunk_interface);
  570. memcpy(mc->chunk_interface, replicate->data_interface, interface_size);
  571. return mc;
  572. }
  573. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  574. {
  575. unsigned dst_node = replicate->memory_node;
  576. struct _starpu_mem_chunk *mc;
  577. /* the interface was already filled by ops->allocate_data_on_node */
  578. size_t interface_size = replicate->handle->ops->interface_size;
  579. /* Put this memchunk in the list of memchunk in use */
  580. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  581. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  582. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  583. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  584. }
  585. /* This function is called when the handle is destroyed (eg. when calling
  586. * unregister or unpartition). It puts all the memchunks that refer to the
  587. * specified handle into the cache.
  588. */
  589. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  590. {
  591. struct _starpu_mem_chunk *mc = replicate->mc;
  592. STARPU_ASSERT(mc->data == handle);
  593. /* Record the allocated size, so that later in memory
  594. * reclaiming we can estimate how much memory we free
  595. * by freeing this. */
  596. mc->size = size;
  597. /* This memchunk doesn't have to do with the data any more. */
  598. replicate->mc = NULL;
  599. replicate->allocated = 0;
  600. replicate->automatically_allocated = 0;
  601. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  602. mc->data = NULL;
  603. /* remove it from the main list */
  604. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  605. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  606. /* We would only flush the RAM nodes cache if memory gets tight, either
  607. * because StarPU automatically knows the total memory size of the
  608. * machine, or because the user has provided a limitation.
  609. *
  610. * We don't really want the former scenario to be eating a lot of
  611. * memory just for caching allocations. Allocating main memory is cheap
  612. * anyway.
  613. */
  614. /* This is particularly important when
  615. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  616. * wouldn't even re-use these allocations! */
  617. if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
  618. {
  619. /* Free data immediately */
  620. free_memory_on_node(mc, node);
  621. free(mc->chunk_interface);
  622. _starpu_mem_chunk_delete(mc);
  623. }
  624. else
  625. {
  626. /* put it in the list of buffers to be removed */
  627. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  628. _starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  629. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  630. }
  631. }
  632. /*
  633. * In order to allocate a piece of data, we try to reuse existing buffers if
  634. * its possible.
  635. * 1 - we try to reuse a memchunk that is explicitely unused.
  636. * 2 - we go through the list of memory chunks and find one that is not
  637. * referenced and that has the same footprint to reuse it.
  638. * 3 - we call the usual driver's alloc method
  639. * 4 - we go through the list of memory chunks and release those that are
  640. * not referenced (or part of those).
  641. *
  642. */
  643. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  644. {
  645. unsigned attempts = 0;
  646. starpu_ssize_t allocated_memory;
  647. int ret;
  648. _starpu_spin_checklocked(&handle->header_lock);
  649. _starpu_data_allocation_inc_stats(dst_node);
  650. #ifdef STARPU_USE_ALLOCATION_CACHE
  651. /* perhaps we can directly reuse a buffer in the free-list */
  652. uint32_t footprint = _starpu_compute_data_footprint(handle);
  653. _STARPU_TRACE_START_ALLOC_REUSE(dst_node);
  654. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  655. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  656. {
  657. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  658. _starpu_allocation_cache_hit(dst_node);
  659. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  660. return data_size;
  661. }
  662. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  663. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  664. #endif
  665. do
  666. {
  667. STARPU_ASSERT(handle->ops);
  668. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  669. _STARPU_TRACE_START_ALLOC(dst_node);
  670. STARPU_ASSERT(replicate->data_interface);
  671. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  672. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  673. {
  674. /* To facilitate the design of interface, we set the
  675. * proper CUDA device in case it is needed. This avoids
  676. * having to set it again in the malloc method of each
  677. * interface. */
  678. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  679. }
  680. #endif
  681. allocated_memory = handle->ops->allocate_data_on_node(replicate->data_interface, dst_node);
  682. _STARPU_TRACE_END_ALLOC(dst_node);
  683. if (allocated_memory == -ENOMEM)
  684. {
  685. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  686. size_t handle_size = handle->ops->get_size(handle);
  687. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  688. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  689. /* Take temporary reference on the replicate */
  690. replicate->refcnt++;
  691. handle->busy_count++;
  692. _starpu_spin_unlock(&handle->header_lock);
  693. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  694. if (is_prefetch)
  695. {
  696. flush_memchunk_cache(dst_node, reclaim);
  697. }
  698. else
  699. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  700. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  701. int cpt = 0;
  702. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  703. {
  704. cpt++;
  705. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  706. }
  707. if (cpt == STARPU_SPIN_MAXTRY)
  708. _starpu_spin_lock(&handle->header_lock);
  709. replicate->refcnt--;
  710. STARPU_ASSERT(replicate->refcnt >= 0);
  711. STARPU_ASSERT(handle->busy_count > 0);
  712. handle->busy_count--;
  713. ret = _starpu_data_check_not_busy(handle);
  714. STARPU_ASSERT(ret == 0);
  715. }
  716. }
  717. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  718. return allocated_memory;
  719. }
  720. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  721. {
  722. starpu_ssize_t allocated_memory;
  723. unsigned dst_node = replicate->memory_node;
  724. STARPU_ASSERT(handle);
  725. /* A buffer is already allocated on the node */
  726. if (replicate->allocated)
  727. return 0;
  728. STARPU_ASSERT(replicate->data_interface);
  729. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  730. /* perhaps we could really not handle that capacity misses */
  731. if (allocated_memory == -ENOMEM)
  732. return -ENOMEM;
  733. register_mem_chunk(replicate, 1);
  734. replicate->allocated = 1;
  735. replicate->automatically_allocated = 1;
  736. if (dst_node == 0)
  737. {
  738. void *ptr = starpu_data_handle_to_pointer(handle, 0);
  739. if (ptr != NULL)
  740. {
  741. _starpu_data_register_ram_pointer(handle, ptr);
  742. }
  743. }
  744. return 0;
  745. }
  746. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  747. {
  748. return handle->per_node[memory_node].allocated;
  749. }
  750. /* This memchunk has been recently used, put it last on the mc_list, so we will
  751. * try to evict it as late as possible */
  752. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  753. {
  754. if (!mc)
  755. /* user-allocated memory */
  756. return;
  757. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  758. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  759. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  760. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  761. }
  762. #ifdef STARPU_MEMORY_STATS
  763. void _starpu_memory_display_stats_by_node(int node)
  764. {
  765. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  766. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  767. {
  768. struct _starpu_mem_chunk *mc;
  769. fprintf(stderr, "#-------\n");
  770. fprintf(stderr, "Data on Node #%d\n",node);
  771. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  772. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  773. mc = _starpu_mem_chunk_list_next(mc))
  774. {
  775. if (mc->automatically_allocated == 0)
  776. _starpu_memory_display_handle_stats(mc->data);
  777. }
  778. }
  779. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  780. }
  781. #endif
  782. void starpu_data_display_memory_stats(void)
  783. {
  784. #ifdef STARPU_MEMORY_STATS
  785. unsigned node;
  786. fprintf(stderr, "\n#---------------------\n");
  787. fprintf(stderr, "Memory stats :\n");
  788. for (node = 0; node < STARPU_MAXNODES; node++)
  789. {
  790. _starpu_memory_display_stats_by_node(node);
  791. }
  792. fprintf(stderr, "\n#---------------------\n");
  793. #endif
  794. }
  795. static int
  796. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  797. {
  798. int target = -1;
  799. unsigned nnodes = starpu_memory_nodes_get_count();
  800. unsigned int i;
  801. double time_disk = 0;
  802. for (i = 0; i < nnodes; i++)
  803. {
  804. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  805. (_starpu_memory_manager_test_allocate_size_(_starpu_data_get_size(handle), i) == 1 ||
  806. handle->per_node[i].allocated))
  807. {
  808. /* if we can write on the disk */
  809. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  810. {
  811. /* only time can change between disk <-> main_ram
  812. * and not between main_ram <-> worker if we compare diks*/
  813. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  814. if (target == -1 || time_disk > time_tmp)
  815. {
  816. target = i;
  817. time_disk = time_tmp;
  818. }
  819. }
  820. }
  821. }
  822. return target;
  823. }
  824. static unsigned
  825. choose_target(starpu_data_handle_t handle, unsigned node)
  826. {
  827. int target = -1;
  828. size_t size_handle = _starpu_data_get_size(handle);
  829. if (handle->home_node != -1)
  830. /* try to push on RAM if we can before to push on disk */
  831. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  832. {
  833. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  834. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  835. {
  836. target = STARPU_MAIN_RAM;
  837. }
  838. else
  839. {
  840. target = get_better_disk_can_accept_size(handle, node);
  841. }
  842. }
  843. /* others memory nodes */
  844. else
  845. {
  846. target = handle->home_node;
  847. }
  848. else
  849. {
  850. /* handle->home_node == -1 */
  851. /* no place for datas in RAM, we push on disk */
  852. if (node == STARPU_MAIN_RAM)
  853. {
  854. target = get_better_disk_can_accept_size(handle, node);
  855. }
  856. /* node != 0 */
  857. /* try to push data to RAM if we can before to push on disk*/
  858. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  859. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  860. {
  861. target = STARPU_MAIN_RAM;
  862. }
  863. /* no place in RAM */
  864. else
  865. {
  866. target = get_better_disk_can_accept_size(handle, node);
  867. }
  868. }
  869. /* we haven't the right to write on the disk */
  870. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  871. target = -1;
  872. return target;
  873. }