memalloc.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. /* This per-node spinlock protect lru_list */
  23. static struct _starpu_spinlock lru_rwlock[STARPU_MAXNODES];
  24. /* Last Recently used memory chunkgs */
  25. static struct _starpu_mem_chunk_lru_list *starpu_lru_list[STARPU_MAXNODES];
  26. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  27. /* Note: handle header lock is always taken before this */
  28. static starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  29. /* Potentially in use memory chunks */
  30. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  31. /* Explicitly caches memory chunks that can be reused */
  32. static struct _starpu_mem_chunk_list *memchunk_cache[STARPU_MAXNODES];
  33. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  34. const unsigned starpu_memstrategy_data_size_coefficient=2;
  35. static void starpu_lru(unsigned node);
  36. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  37. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  38. void _starpu_init_mem_chunk_lists(void)
  39. {
  40. unsigned i;
  41. for (i = 0; i < STARPU_MAXNODES; i++)
  42. {
  43. STARPU_PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  44. _starpu_spin_init(&lru_rwlock[i]);
  45. mc_list[i] = _starpu_mem_chunk_list_new();
  46. starpu_lru_list[i] = _starpu_mem_chunk_lru_list_new();
  47. memchunk_cache[i] = _starpu_mem_chunk_list_new();
  48. }
  49. }
  50. void _starpu_deinit_mem_chunk_lists(void)
  51. {
  52. unsigned i;
  53. for (i = 0; i < STARPU_MAXNODES; i++)
  54. {
  55. _starpu_mem_chunk_list_delete(mc_list[i]);
  56. _starpu_mem_chunk_list_delete(memchunk_cache[i]);
  57. _starpu_mem_chunk_lru_list_delete(starpu_lru_list[i]);
  58. _starpu_spin_destroy(&lru_rwlock[i]);
  59. STARPU_PTHREAD_RWLOCK_DESTROY(&mc_rwlock[i]);
  60. }
  61. }
  62. /*
  63. * Manipulate subtrees
  64. */
  65. static void unlock_all_subtree(starpu_data_handle_t handle)
  66. {
  67. /* lock all sub-subtrees children
  68. * Note that this is done in the reverse order of the
  69. * lock_all_subtree so that we avoid deadlock */
  70. unsigned i;
  71. for (i =0; i < handle->nchildren; i++)
  72. {
  73. unsigned child = handle->nchildren - 1 - i;
  74. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  75. unlock_all_subtree(child_handle);
  76. }
  77. _starpu_spin_unlock(&handle->header_lock);
  78. }
  79. static int lock_all_subtree(starpu_data_handle_t handle)
  80. {
  81. int child;
  82. /* lock parent */
  83. if (_starpu_spin_trylock(&handle->header_lock))
  84. /* the handle is busy, abort */
  85. return 0;
  86. /* lock all sub-subtrees children */
  87. for (child = 0; child < (int) handle->nchildren; child++)
  88. {
  89. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  90. /* Some child is busy, abort */
  91. while (--child >= 0)
  92. /* Unlock what we have already uselessly locked */
  93. unlock_all_subtree(starpu_data_get_child(handle, child));
  94. return 0;
  95. }
  96. }
  97. return 1;
  98. }
  99. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  100. {
  101. /* we only free if no one refers to the leaf */
  102. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  103. if (refcnt)
  104. return 0;
  105. if (!handle->nchildren)
  106. return 1;
  107. /* look into all sub-subtrees children */
  108. unsigned child;
  109. for (child = 0; child < handle->nchildren; child++)
  110. {
  111. unsigned res;
  112. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  113. res = may_free_subtree(child_handle, node);
  114. if (!res) return 0;
  115. }
  116. /* no problem was found */
  117. return 1;
  118. }
  119. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  120. unsigned dst_node)
  121. {
  122. unsigned i;
  123. unsigned last = 0;
  124. unsigned cnt;
  125. int ret;
  126. STARPU_ASSERT(dst_node != src_node);
  127. if (handle->nchildren == 0)
  128. {
  129. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  130. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  131. /* this is a leaf */
  132. switch(src_replicate->state)
  133. {
  134. case STARPU_OWNER:
  135. /* the local node has the only copy */
  136. /* the owner is now the destination_node */
  137. src_replicate->state = STARPU_INVALID;
  138. dst_replicate->state = STARPU_OWNER;
  139. #ifdef STARPU_DEVEL
  140. #warning we should use requests during memory reclaim
  141. #endif
  142. /* TODO use request !! */
  143. /* Take temporary references on the replicates */
  144. _starpu_spin_checklocked(&handle->header_lock);
  145. src_replicate->refcnt++;
  146. dst_replicate->refcnt++;
  147. handle->busy_count+=2;
  148. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  149. STARPU_ASSERT(ret == 0);
  150. src_replicate->refcnt--;
  151. dst_replicate->refcnt--;
  152. STARPU_ASSERT(handle->busy_count >= 2);
  153. handle->busy_count -= 2;
  154. ret = _starpu_data_check_not_busy(handle);
  155. STARPU_ASSERT(ret == 0);
  156. break;
  157. case STARPU_SHARED:
  158. /* some other node may have the copy */
  159. src_replicate->state = STARPU_INVALID;
  160. /* count the number of copies */
  161. cnt = 0;
  162. for (i = 0; i < STARPU_MAXNODES; i++)
  163. {
  164. if (handle->per_node[i].state == STARPU_SHARED)
  165. {
  166. cnt++;
  167. last = i;
  168. }
  169. }
  170. STARPU_ASSERT(cnt > 0);
  171. if (cnt == 1)
  172. handle->per_node[last].state = STARPU_OWNER;
  173. break;
  174. case STARPU_INVALID:
  175. /* nothing to be done */
  176. break;
  177. default:
  178. STARPU_ABORT();
  179. break;
  180. }
  181. }
  182. else
  183. {
  184. /* lock all sub-subtrees children */
  185. unsigned child;
  186. for (child = 0; child < handle->nchildren; child++)
  187. {
  188. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  189. transfer_subtree_to_node(child_handle, src_node, dst_node);
  190. }
  191. }
  192. }
  193. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  194. {
  195. unsigned child;
  196. replicate->allocated = 0;
  197. /* XXX why do we need that ? */
  198. replicate->automatically_allocated = 0;
  199. for (child = 0; child < handle->nchildren; child++)
  200. {
  201. /* Notify children that their buffer has been deallocated too */
  202. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  203. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  204. }
  205. }
  206. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  207. {
  208. size_t freed = 0;
  209. STARPU_ASSERT(mc->ops);
  210. STARPU_ASSERT(mc->ops->free_data_on_node);
  211. starpu_data_handle_t handle = mc->data;
  212. struct _starpu_data_replicate *replicate = mc->replicate;
  213. if (handle)
  214. _starpu_spin_checklocked(&handle->header_lock);
  215. if (mc->automatically_allocated &&
  216. (!handle || replicate->refcnt == 0))
  217. {
  218. if (handle)
  219. STARPU_ASSERT(replicate->allocated);
  220. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  221. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  222. {
  223. /* To facilitate the design of interface, we set the
  224. * proper CUDA device in case it is needed. This avoids
  225. * having to set it again in the free method of each
  226. * interface. */
  227. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  228. }
  229. #endif
  230. mc->ops->free_data_on_node(mc->chunk_interface, node);
  231. if (handle)
  232. notify_handle_children(handle, replicate, node);
  233. freed = mc->size;
  234. if (handle)
  235. STARPU_ASSERT(replicate->refcnt == 0);
  236. }
  237. return freed;
  238. }
  239. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  240. {
  241. size_t size;
  242. starpu_data_handle_t handle = mc->data;
  243. if (handle) {
  244. _starpu_spin_checklocked(&handle->header_lock);
  245. mc->size = _starpu_data_get_size(handle);
  246. }
  247. mc->replicate->mc=NULL;
  248. /* free the actual buffer */
  249. size = free_memory_on_node(mc, node);
  250. /* remove the mem_chunk from the list */
  251. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  252. free(mc->chunk_interface);
  253. _starpu_mem_chunk_delete(mc);
  254. return size;
  255. }
  256. /* This function is called for memory chunks that are possibly in used (ie. not
  257. * in the cache). They should therefore still be associated to a handle. */
  258. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  259. {
  260. size_t freed = 0;
  261. starpu_data_handle_t handle;
  262. handle = mc->data;
  263. STARPU_ASSERT(handle);
  264. /* This data should be written through to this node, avoid dropping it! */
  265. if (handle->wt_mask & (1<<node))
  266. return 0;
  267. /* This data was registered from this node, we will not be able to drop it anyway */
  268. if ((int) node == handle->home_node)
  269. return 0;
  270. /* REDUX memchunk */
  271. if (mc->relaxed_coherency == 2)
  272. {
  273. /* TODO: reduce it back to e.g. main memory */
  274. }
  275. else
  276. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  277. * memchunk that could be used with filters. */
  278. if (mc->relaxed_coherency == 1)
  279. {
  280. STARPU_ASSERT(mc->replicate);
  281. if (_starpu_spin_trylock(&handle->header_lock))
  282. /* Handle is busy, abort */
  283. return 0;
  284. if (mc->replicate->refcnt == 0)
  285. {
  286. /* Note taht there is no need to transfer any data or
  287. * to update the status in terms of MSI protocol
  288. * because this memchunk is associated to a replicate
  289. * in "relaxed coherency" mode. */
  290. freed = do_free_mem_chunk(mc, node);
  291. }
  292. _starpu_spin_unlock(&handle->header_lock);
  293. }
  294. /* try to lock all the subtree */
  295. else if (lock_all_subtree(handle))
  296. {
  297. /* check if they are all "free" */
  298. if (may_free_subtree(handle, node))
  299. {
  300. int target = -1;
  301. /* XXX Considering only owner to invalidate */
  302. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  303. /* in case there was nobody using that buffer, throw it
  304. * away after writing it back to main memory */
  305. /* choose the best target */
  306. target = choose_target(handle, node);
  307. if (target != -1) {
  308. #ifdef STARPU_MEMORY_STATS
  309. if (handle->per_node[node].state == STARPU_OWNER)
  310. _starpu_memory_handle_stats_invalidated(handle, node);
  311. #endif
  312. transfer_subtree_to_node(handle, node, target);
  313. #ifdef STARPU_MEMORY_STATS
  314. _starpu_memory_handle_stats_loaded_owner(handle, target);
  315. #endif
  316. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  317. /* now the actual buffer may be freed */
  318. freed = do_free_mem_chunk(mc, node);
  319. }
  320. }
  321. /* unlock the tree */
  322. unlock_all_subtree(handle);
  323. }
  324. return freed;
  325. }
  326. #ifdef STARPU_USE_ALLOCATION_CACHE
  327. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  328. * that the mc is already in the list of buffers that are possibly used, and
  329. * therefore not in the cache. */
  330. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  331. {
  332. /* we found an appropriate mem chunk: so we get it out
  333. * of the "to free" list, and reassign it to the new
  334. * piece of data */
  335. if (!is_already_in_mc_list)
  336. {
  337. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  338. }
  339. struct _starpu_data_replicate *old_replicate = mc->replicate;
  340. old_replicate->allocated = 0;
  341. old_replicate->automatically_allocated = 0;
  342. old_replicate->initialized = 0;
  343. new_replicate->allocated = 1;
  344. new_replicate->automatically_allocated = 1;
  345. new_replicate->initialized = 0;
  346. STARPU_ASSERT(new_replicate->data_interface);
  347. STARPU_ASSERT(mc->chunk_interface);
  348. memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
  349. mc->data = new_replicate->handle;
  350. /* mc->ops, mc->footprint and mc->interface should be
  351. * unchanged ! */
  352. /* reinsert the mem chunk in the list of active memory chunks */
  353. if (!is_already_in_mc_list)
  354. {
  355. _starpu_mem_chunk_list_push_front(mc_list[node], mc);
  356. }
  357. }
  358. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  359. {
  360. unsigned success = 0;
  361. starpu_data_handle_t old_data;
  362. old_data = mc->data;
  363. STARPU_ASSERT(old_data);
  364. /* try to lock all the subtree */
  365. /* and check if they are all "free" */
  366. if (lock_all_subtree(old_data))
  367. {
  368. if (may_free_subtree(old_data, node))
  369. {
  370. success = 1;
  371. /* in case there was nobody using that buffer, throw it
  372. * away after writing it back to main memory */
  373. transfer_subtree_to_node(old_data, node, 0);
  374. /* now replace the previous data */
  375. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  376. }
  377. /* unlock the tree */
  378. unlock_all_subtree(old_data);
  379. }
  380. return success;
  381. }
  382. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  383. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  384. {
  385. if (ops_a->interfaceid != ops_b->interfaceid)
  386. return -1;
  387. int ret = ops_a->compare(data_interface_a, data_interface_b);
  388. return ret;
  389. }
  390. /* This function must be called with mc_rwlock[node] taken in write mode */
  391. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle)
  392. {
  393. uint32_t footprint = _starpu_compute_data_footprint(handle);
  394. /* go through all buffers in the cache */
  395. struct _starpu_mem_chunk *mc;
  396. for (mc = _starpu_mem_chunk_list_begin(memchunk_cache[node]);
  397. mc != _starpu_mem_chunk_list_end(memchunk_cache[node]);
  398. mc = _starpu_mem_chunk_list_next(mc))
  399. {
  400. if (mc->footprint == footprint)
  401. {
  402. /* Is that a false hit ? (this is _very_ unlikely) */
  403. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops))
  404. continue;
  405. /* Cache hit */
  406. /* Remove from the cache */
  407. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  408. return mc;
  409. }
  410. }
  411. /* This is a cache miss */
  412. return NULL;
  413. }
  414. /* this function looks for a memory chunk that matches a given footprint in the
  415. * list of mem chunk that need to be freed. This function must be called with
  416. * mc_rwlock[node] taken in write mode. */
  417. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  418. {
  419. struct _starpu_mem_chunk *mc, *next_mc;
  420. /* go through all buffers in the cache */
  421. mc = _starpu_memchunk_cache_lookup_locked(node, data);
  422. if (mc)
  423. {
  424. /* We found an entry in the cache so we can reuse it */
  425. reuse_mem_chunk(node, replicate, mc, 0);
  426. return 1;
  427. }
  428. /* now look for some non essential data in the active list */
  429. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  430. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  431. mc = next_mc)
  432. {
  433. /* there is a risk that the memory chunk is freed before next
  434. * iteration starts: so we compute the next element of the list
  435. * now */
  436. next_mc = _starpu_mem_chunk_list_next(mc);
  437. if (mc->data->is_not_important && (mc->footprint == footprint))
  438. {
  439. // fprintf(stderr, "found a candidate ...\n");
  440. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  441. return 1;
  442. }
  443. }
  444. return 0;
  445. }
  446. #endif
  447. /*
  448. * Free the memory chuncks that are explicitely tagged to be freed. The
  449. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  450. */
  451. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  452. {
  453. struct _starpu_mem_chunk *mc;
  454. struct _starpu_mem_chunk_list *busy_memchunk_cache;
  455. size_t freed = 0;
  456. if (_starpu_mem_chunk_list_empty(memchunk_cache[node]))
  457. return 0;
  458. busy_memchunk_cache = _starpu_mem_chunk_list_new();
  459. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  460. while (!_starpu_mem_chunk_list_empty(memchunk_cache[node])) {
  461. mc = _starpu_mem_chunk_list_pop_front(memchunk_cache[node]);
  462. starpu_data_handle_t handle = mc->data;
  463. if (handle)
  464. if (_starpu_spin_trylock(&handle->header_lock)) {
  465. /* The handle is still busy, leave this chunk for later */
  466. _starpu_mem_chunk_list_push_front(busy_memchunk_cache, mc);
  467. continue;
  468. }
  469. freed += free_memory_on_node(mc, node);
  470. if (handle)
  471. _starpu_spin_unlock(&handle->header_lock);
  472. free(mc->chunk_interface);
  473. _starpu_mem_chunk_delete(mc);
  474. if (reclaim && freed >= reclaim)
  475. break;
  476. }
  477. _starpu_mem_chunk_list_push_list_front(busy_memchunk_cache, memchunk_cache[node]);
  478. _starpu_mem_chunk_list_delete(busy_memchunk_cache);
  479. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  480. return freed;
  481. }
  482. /*
  483. * Try to free the buffers currently in use on the memory node. If the force
  484. * flag is set, the memory is freed regardless of coherency concerns (this
  485. * should only be used at the termination of StarPU for instance). The
  486. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  487. */
  488. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  489. {
  490. size_t freed = 0;
  491. struct _starpu_mem_chunk *mc, *next_mc;
  492. /*
  493. * We have to unlock mc_rwlock before locking header_lock, so we have
  494. * to be careful with the list. We try to do just one pass, by
  495. * remembering the next mc to be tried. If it gets dropped, we restart
  496. * from zero. So we continue until we go through the whole list without
  497. * finding anything to free.
  498. */
  499. restart:
  500. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  501. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  502. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  503. mc = next_mc)
  504. {
  505. /* mc hopefully gets out of the list, we thus need to prefetch
  506. * the next element */
  507. next_mc = _starpu_mem_chunk_list_next(mc);
  508. if (!force)
  509. {
  510. freed += try_to_free_mem_chunk(mc, node);
  511. if (reclaim && freed >= reclaim)
  512. break;
  513. }
  514. else
  515. {
  516. starpu_data_handle_t handle = mc->data;
  517. if (_starpu_spin_trylock(&handle->header_lock))
  518. {
  519. /* Ergl. We are shutting down, but somebody is
  520. * still locking the handle. That's not
  521. * supposed to happen, but better be safe by
  522. * letting it go through. */
  523. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  524. goto restart;
  525. }
  526. /* We must free the memory now, because we are
  527. * terminating the drivers: note that data coherency is
  528. * not maintained in that case ! */
  529. freed += do_free_mem_chunk(mc, node);
  530. _starpu_spin_unlock(&handle->header_lock);
  531. }
  532. }
  533. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  534. return freed;
  535. }
  536. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  537. {
  538. size_t freed = 0;
  539. starpu_lru(node);
  540. /* remove all buffers for which there was a removal request */
  541. freed += flush_memchunk_cache(node, reclaim);
  542. /* try to free all allocated data potentially in use */
  543. if (reclaim && freed<reclaim)
  544. freed += free_potentially_in_use_mc(node, force, reclaim);
  545. return freed;
  546. }
  547. /*
  548. * This function frees all the memory that was implicitely allocated by StarPU
  549. * (for the data replicates). This is not ensuring data coherency, and should
  550. * only be called while StarPU is getting shut down.
  551. */
  552. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  553. {
  554. return _starpu_memory_reclaim_generic(node, 1, 0);
  555. }
  556. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  557. {
  558. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  559. starpu_data_handle_t handle = replicate->handle;
  560. STARPU_ASSERT(handle);
  561. STARPU_ASSERT(handle->ops);
  562. mc->data = handle;
  563. mc->footprint = _starpu_compute_data_footprint(handle);
  564. mc->ops = handle->ops;
  565. mc->automatically_allocated = automatically_allocated;
  566. mc->relaxed_coherency = replicate->relaxed_coherency;
  567. mc->replicate = replicate;
  568. mc->replicate->mc = mc;
  569. /* Save a copy of the interface */
  570. mc->chunk_interface = malloc(interface_size);
  571. STARPU_ASSERT(mc->chunk_interface);
  572. memcpy(mc->chunk_interface, replicate->data_interface, interface_size);
  573. return mc;
  574. }
  575. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  576. {
  577. unsigned dst_node = replicate->memory_node;
  578. struct _starpu_mem_chunk *mc;
  579. /* the interface was already filled by ops->allocate_data_on_node */
  580. size_t interface_size = replicate->handle->ops->interface_size;
  581. /* Put this memchunk in the list of memchunk in use */
  582. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  583. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  584. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  585. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  586. }
  587. /* This function is called when the handle is destroyed (eg. when calling
  588. * unregister or unpartition). It puts all the memchunks that refer to the
  589. * specified handle into the cache.
  590. */
  591. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  592. {
  593. struct _starpu_mem_chunk *mc = replicate->mc;
  594. STARPU_ASSERT(mc->data == handle);
  595. /* Record the allocated size, so that later in memory
  596. * reclaiming we can estimate how much memory we free
  597. * by freeing this. */
  598. mc->size = size;
  599. /* This memchunk doesn't have to do with the data any more. */
  600. replicate->mc = NULL;
  601. replicate->allocated = 0;
  602. replicate->automatically_allocated = 0;
  603. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  604. mc->data = NULL;
  605. /* remove it from the main list */
  606. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  607. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  608. /* We would only flush the RAM nodes cache if memory gets tight, either
  609. * because StarPU automatically knows the total memory size of the
  610. * machine, or because the user has provided a limitation.
  611. *
  612. * We don't really want the former scenario to be eating a lot of
  613. * memory just for caching allocations. Allocating main memory is cheap
  614. * anyway.
  615. */
  616. /* This is particularly important when
  617. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  618. * wouldn't even re-use these allocations! */
  619. if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
  620. {
  621. /* Free data immediately */
  622. free_memory_on_node(mc, node);
  623. free(mc->chunk_interface);
  624. _starpu_mem_chunk_delete(mc);
  625. }
  626. else
  627. {
  628. /* put it in the list of buffers to be removed */
  629. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  630. _starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  631. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  632. }
  633. }
  634. /*
  635. * In order to allocate a piece of data, we try to reuse existing buffers if
  636. * its possible.
  637. * 1 - we try to reuse a memchunk that is explicitely unused.
  638. * 2 - we go through the list of memory chunks and find one that is not
  639. * referenced and that has the same footprint to reuse it.
  640. * 3 - we call the usual driver's alloc method
  641. * 4 - we go through the list of memory chunks and release those that are
  642. * not referenced (or part of those).
  643. *
  644. */
  645. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  646. {
  647. unsigned attempts = 0;
  648. starpu_ssize_t allocated_memory;
  649. int ret;
  650. _starpu_spin_checklocked(&handle->header_lock);
  651. _starpu_data_allocation_inc_stats(dst_node);
  652. #ifdef STARPU_USE_ALLOCATION_CACHE
  653. /* perhaps we can directly reuse a buffer in the free-list */
  654. uint32_t footprint = _starpu_compute_data_footprint(handle);
  655. _STARPU_TRACE_START_ALLOC_REUSE(dst_node);
  656. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  657. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  658. {
  659. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  660. _starpu_allocation_cache_hit(dst_node);
  661. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  662. return data_size;
  663. }
  664. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  665. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  666. #endif
  667. do
  668. {
  669. STARPU_ASSERT(handle->ops);
  670. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  671. _STARPU_TRACE_START_ALLOC(dst_node);
  672. STARPU_ASSERT(replicate->data_interface);
  673. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  674. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  675. {
  676. /* To facilitate the design of interface, we set the
  677. * proper CUDA device in case it is needed. This avoids
  678. * having to set it again in the malloc method of each
  679. * interface. */
  680. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  681. }
  682. #endif
  683. allocated_memory = handle->ops->allocate_data_on_node(replicate->data_interface, dst_node);
  684. _STARPU_TRACE_END_ALLOC(dst_node);
  685. if (allocated_memory == -ENOMEM)
  686. {
  687. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  688. size_t handle_size = handle->ops->get_size(handle);
  689. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  690. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  691. /* Take temporary reference on the replicate */
  692. replicate->refcnt++;
  693. handle->busy_count++;
  694. _starpu_spin_unlock(&handle->header_lock);
  695. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  696. if (is_prefetch)
  697. {
  698. flush_memchunk_cache(dst_node, reclaim);
  699. }
  700. else
  701. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  702. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  703. int cpt = 0;
  704. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  705. {
  706. cpt++;
  707. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  708. }
  709. if (cpt == STARPU_SPIN_MAXTRY)
  710. _starpu_spin_lock(&handle->header_lock);
  711. replicate->refcnt--;
  712. STARPU_ASSERT(replicate->refcnt >= 0);
  713. STARPU_ASSERT(handle->busy_count > 0);
  714. handle->busy_count--;
  715. ret = _starpu_data_check_not_busy(handle);
  716. STARPU_ASSERT(ret == 0);
  717. }
  718. }
  719. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  720. return allocated_memory;
  721. }
  722. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  723. {
  724. starpu_ssize_t allocated_memory;
  725. unsigned dst_node = replicate->memory_node;
  726. STARPU_ASSERT(handle);
  727. /* A buffer is already allocated on the node */
  728. if (replicate->allocated)
  729. return 0;
  730. STARPU_ASSERT(replicate->data_interface);
  731. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  732. /* perhaps we could really not handle that capacity misses */
  733. if (allocated_memory == -ENOMEM)
  734. return -ENOMEM;
  735. register_mem_chunk(replicate, 1);
  736. replicate->allocated = 1;
  737. replicate->automatically_allocated = 1;
  738. if (dst_node == 0)
  739. {
  740. void *ptr = starpu_data_handle_to_pointer(handle, 0);
  741. if (ptr != NULL)
  742. {
  743. _starpu_data_register_ram_pointer(handle, ptr);
  744. }
  745. }
  746. return 0;
  747. }
  748. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  749. {
  750. return handle->per_node[memory_node].allocated;
  751. }
  752. /* Record that this memchunk has been recently used */
  753. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  754. {
  755. _starpu_spin_lock(&lru_rwlock[node]);
  756. struct _starpu_mem_chunk_lru *mc_lru=_starpu_mem_chunk_lru_new();
  757. mc_lru->mc=mc;
  758. _starpu_mem_chunk_lru_list_push_front(starpu_lru_list[node],mc_lru);
  759. _starpu_spin_unlock(&lru_rwlock[node]);
  760. }
  761. /* Push the given memchunk, recently used, at the end of the chunks to be evicted */
  762. /* The mc_rwlock[node] rw-lock should be taken prior to calling this function.*/
  763. static void _starpu_memchunk_recently_used_move(struct _starpu_mem_chunk *mc, unsigned node)
  764. {
  765. /* Note: Sometimes the memchunk is not in the list... */
  766. struct _starpu_mem_chunk *mc_iter;
  767. for (mc_iter = _starpu_mem_chunk_list_begin(mc_list[node]);
  768. mc_iter != _starpu_mem_chunk_list_end(mc_list[node]);
  769. mc_iter = _starpu_mem_chunk_list_next(mc_iter) )
  770. {
  771. if (mc_iter==mc)
  772. {
  773. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  774. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  775. return;
  776. }
  777. }
  778. }
  779. /* Put the recently used memchunks at the end of the mc_list, in the same order
  780. * as the LRU list, so that the most recently used memchunk eventually comes
  781. * last in the mc_list */
  782. static void starpu_lru(unsigned node)
  783. {
  784. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  785. _starpu_spin_lock(&lru_rwlock[node]);
  786. while (!_starpu_mem_chunk_lru_list_empty(starpu_lru_list[node]))
  787. {
  788. struct _starpu_mem_chunk_lru *mc_lru=_starpu_mem_chunk_lru_list_front(starpu_lru_list[node]);
  789. _starpu_memchunk_recently_used_move(mc_lru->mc, node);
  790. _starpu_mem_chunk_lru_list_erase(starpu_lru_list[node], mc_lru);
  791. _starpu_mem_chunk_lru_delete(mc_lru);
  792. }
  793. _starpu_spin_unlock(&lru_rwlock[node]);
  794. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  795. }
  796. #ifdef STARPU_MEMORY_STATS
  797. void _starpu_memory_display_stats_by_node(int node)
  798. {
  799. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  800. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  801. {
  802. struct _starpu_mem_chunk *mc;
  803. fprintf(stderr, "#-------\n");
  804. fprintf(stderr, "Data on Node #%d\n",node);
  805. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  806. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  807. mc = _starpu_mem_chunk_list_next(mc))
  808. {
  809. if (mc->automatically_allocated == 0)
  810. _starpu_memory_display_handle_stats(mc->data);
  811. }
  812. }
  813. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  814. }
  815. #endif
  816. void starpu_data_display_memory_stats(void)
  817. {
  818. #ifdef STARPU_MEMORY_STATS
  819. unsigned node;
  820. fprintf(stderr, "\n#---------------------\n");
  821. fprintf(stderr, "Memory stats :\n");
  822. for (node = 0; node < STARPU_MAXNODES; node++)
  823. {
  824. _starpu_memory_display_stats_by_node(node);
  825. }
  826. fprintf(stderr, "\n#---------------------\n");
  827. #endif
  828. }
  829. static int
  830. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  831. {
  832. int target = -1;
  833. unsigned nnodes = starpu_memory_nodes_get_count();
  834. unsigned int i;
  835. double time_disk = 0;
  836. for (i = 0; i < nnodes; i++)
  837. {
  838. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  839. (_starpu_memory_manager_test_allocate_size_(_starpu_data_get_size(handle), i) == 1 ||
  840. handle->per_node[i].allocated))
  841. {
  842. /* if we can write on the disk */
  843. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  844. {
  845. /* only time can change between disk <-> main_ram
  846. * and not between main_ram <-> worker if we compare diks*/
  847. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  848. if (target == -1 || time_disk > time_tmp)
  849. {
  850. target = i;
  851. time_disk = time_tmp;
  852. }
  853. }
  854. }
  855. }
  856. return target;
  857. }
  858. static unsigned
  859. choose_target(starpu_data_handle_t handle, unsigned node)
  860. {
  861. int target = -1;
  862. size_t size_handle = _starpu_data_get_size(handle);
  863. if (handle->home_node != -1)
  864. /* try to push on RAM if we can before to push on disk */
  865. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  866. {
  867. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  868. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  869. {
  870. target = STARPU_MAIN_RAM;
  871. }
  872. else
  873. {
  874. target = get_better_disk_can_accept_size(handle, node);
  875. }
  876. }
  877. /* others memory nodes */
  878. else
  879. {
  880. target = handle->home_node;
  881. }
  882. else
  883. {
  884. /* handle->home_node == -1 */
  885. /* no place for datas in RAM, we push on disk */
  886. if (node == STARPU_MAIN_RAM)
  887. {
  888. target = get_better_disk_can_accept_size(handle, node);
  889. }
  890. /* node != 0 */
  891. /* try to push data to RAM if we can before to push on disk*/
  892. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  893. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  894. {
  895. target = STARPU_MAIN_RAM;
  896. }
  897. /* no place in RAM */
  898. else
  899. {
  900. target = get_better_disk_can_accept_size(handle, node);
  901. }
  902. }
  903. /* we haven't the right to write on the disk */
  904. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  905. target = -1;
  906. return target;
  907. }