memalloc.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. #include <common/uthash.h>
  23. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  24. /* Note: handle header lock is always taken before this */
  25. static struct _starpu_spinlock mc_lock[STARPU_MAXNODES];
  26. /* Potentially in use memory chunks */
  27. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  28. /* Explicitly caches memory chunks that can be reused */
  29. struct mc_cache_entry
  30. {
  31. UT_hash_handle hh;
  32. struct _starpu_mem_chunk_list *list;
  33. uint32_t footprint;
  34. };
  35. static struct mc_cache_entry *mc_cache[STARPU_MAXNODES];
  36. static int mc_cache_nb[STARPU_MAXNODES];
  37. static starpu_ssize_t mc_cache_size[STARPU_MAXNODES];
  38. /* Whether some thread is currently tidying this node */
  39. static unsigned tidying[STARPU_MAXNODES];
  40. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  41. const unsigned starpu_memstrategy_data_size_coefficient=2;
  42. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  43. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  44. void _starpu_init_mem_chunk_lists(void)
  45. {
  46. unsigned i;
  47. for (i = 0; i < STARPU_MAXNODES; i++)
  48. {
  49. _starpu_spin_init(&mc_lock[i]);
  50. mc_list[i] = _starpu_mem_chunk_list_new();
  51. STARPU_HG_DISABLE_CHECKING(mc_cache_size[i]);
  52. }
  53. }
  54. void _starpu_deinit_mem_chunk_lists(void)
  55. {
  56. unsigned i;
  57. for (i = 0; i < STARPU_MAXNODES; i++)
  58. {
  59. struct mc_cache_entry *entry, *tmp;
  60. _starpu_mem_chunk_list_delete(mc_list[i]);
  61. HASH_ITER(hh, mc_cache[i], entry, tmp)
  62. {
  63. HASH_DEL(mc_cache[i], entry);
  64. _starpu_mem_chunk_list_delete(entry->list);
  65. free(entry);
  66. }
  67. STARPU_ASSERT(mc_cache_nb[i] == 0);
  68. STARPU_ASSERT(mc_cache_size[i] == 0);
  69. _starpu_spin_destroy(&mc_lock[i]);
  70. }
  71. }
  72. /*
  73. * Manipulate subtrees
  74. */
  75. static void unlock_all_subtree(starpu_data_handle_t handle)
  76. {
  77. /* lock all sub-subtrees children
  78. * Note that this is done in the reverse order of the
  79. * lock_all_subtree so that we avoid deadlock */
  80. unsigned i;
  81. for (i =0; i < handle->nchildren; i++)
  82. {
  83. unsigned child = handle->nchildren - 1 - i;
  84. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  85. unlock_all_subtree(child_handle);
  86. }
  87. _starpu_spin_unlock(&handle->header_lock);
  88. }
  89. static int lock_all_subtree(starpu_data_handle_t handle)
  90. {
  91. int child;
  92. /* lock parent */
  93. if (_starpu_spin_trylock(&handle->header_lock))
  94. /* the handle is busy, abort */
  95. return 0;
  96. /* lock all sub-subtrees children */
  97. for (child = 0; child < (int) handle->nchildren; child++)
  98. {
  99. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  100. /* Some child is busy, abort */
  101. while (--child >= 0)
  102. /* Unlock what we have already uselessly locked */
  103. unlock_all_subtree(starpu_data_get_child(handle, child));
  104. return 0;
  105. }
  106. }
  107. return 1;
  108. }
  109. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  110. {
  111. /* we only free if no one refers to the leaf */
  112. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  113. if (refcnt)
  114. return 0;
  115. if (!handle->nchildren)
  116. return 1;
  117. /* look into all sub-subtrees children */
  118. unsigned child;
  119. for (child = 0; child < handle->nchildren; child++)
  120. {
  121. unsigned res;
  122. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  123. res = may_free_subtree(child_handle, node);
  124. if (!res) return 0;
  125. }
  126. /* no problem was found */
  127. return 1;
  128. }
  129. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  130. unsigned dst_node)
  131. {
  132. unsigned i;
  133. unsigned last = 0;
  134. unsigned cnt;
  135. int ret;
  136. STARPU_ASSERT(dst_node != src_node);
  137. if (handle->nchildren == 0)
  138. {
  139. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  140. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  141. /* this is a leaf */
  142. switch(src_replicate->state)
  143. {
  144. case STARPU_OWNER:
  145. #ifdef STARPU_DEVEL
  146. #warning we should use requests during memory reclaim
  147. #endif
  148. /* TODO use request !! */
  149. /* Take temporary references on the replicates */
  150. _starpu_spin_checklocked(&handle->header_lock);
  151. src_replicate->refcnt++;
  152. dst_replicate->refcnt++;
  153. handle->busy_count+=2;
  154. /* Note: this may release the header lock if
  155. * destination is not allocated yet */
  156. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  157. STARPU_ASSERT(ret == 0);
  158. src_replicate->refcnt--;
  159. dst_replicate->refcnt--;
  160. STARPU_ASSERT(handle->busy_count >= 2);
  161. handle->busy_count -= 2;
  162. ret = _starpu_data_check_not_busy(handle);
  163. STARPU_ASSERT(ret == 0);
  164. dst_replicate->state = STARPU_SHARED;
  165. /* NOTE: now that it's SHARED on dst, FALLTHROUGH to
  166. * update src and perhaps make dst OWNER */
  167. case STARPU_SHARED:
  168. /* some other node may have the copy */
  169. src_replicate->state = STARPU_INVALID;
  170. /* count the number of copies */
  171. cnt = 0;
  172. for (i = 0; i < STARPU_MAXNODES; i++)
  173. {
  174. if (handle->per_node[i].state == STARPU_SHARED)
  175. {
  176. cnt++;
  177. last = i;
  178. }
  179. }
  180. STARPU_ASSERT(cnt > 0);
  181. if (cnt == 1)
  182. handle->per_node[last].state = STARPU_OWNER;
  183. break;
  184. case STARPU_INVALID:
  185. /* nothing to be done */
  186. break;
  187. default:
  188. STARPU_ABORT();
  189. break;
  190. }
  191. }
  192. else
  193. {
  194. /* lock all sub-subtrees children */
  195. unsigned child;
  196. for (child = 0; child < handle->nchildren; child++)
  197. {
  198. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  199. transfer_subtree_to_node(child_handle, src_node, dst_node);
  200. }
  201. }
  202. }
  203. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  204. {
  205. unsigned child;
  206. replicate->allocated = 0;
  207. /* XXX why do we need that ? */
  208. replicate->automatically_allocated = 0;
  209. for (child = 0; child < handle->nchildren; child++)
  210. {
  211. /* Notify children that their buffer has been deallocated too */
  212. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  213. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  214. }
  215. }
  216. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  217. {
  218. size_t freed = 0;
  219. STARPU_ASSERT(mc->ops);
  220. STARPU_ASSERT(mc->ops->free_data_on_node);
  221. starpu_data_handle_t handle = mc->data;
  222. struct _starpu_data_replicate *replicate = mc->replicate;
  223. if (handle)
  224. _starpu_spin_checklocked(&handle->header_lock);
  225. if (mc->automatically_allocated &&
  226. (!handle || replicate->refcnt == 0))
  227. {
  228. void *data_interface;
  229. if (handle)
  230. STARPU_ASSERT(replicate->allocated);
  231. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  232. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  233. {
  234. /* To facilitate the design of interface, we set the
  235. * proper CUDA device in case it is needed. This avoids
  236. * having to set it again in the free method of each
  237. * interface. */
  238. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  239. }
  240. #endif
  241. if (handle)
  242. data_interface = replicate->data_interface;
  243. else
  244. data_interface = mc->chunk_interface;
  245. STARPU_ASSERT(data_interface);
  246. _STARPU_TRACE_START_FREE(node, mc->size);
  247. mc->ops->free_data_on_node(data_interface, node);
  248. _STARPU_TRACE_END_FREE(node);
  249. if (handle)
  250. notify_handle_children(handle, replicate, node);
  251. freed = mc->size;
  252. if (handle)
  253. STARPU_ASSERT(replicate->refcnt == 0);
  254. }
  255. return freed;
  256. }
  257. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  258. {
  259. size_t size;
  260. starpu_data_handle_t handle = mc->data;
  261. if (handle) {
  262. _starpu_spin_checklocked(&handle->header_lock);
  263. mc->size = _starpu_data_get_size(handle);
  264. }
  265. if (mc->replicate)
  266. mc->replicate->mc=NULL;
  267. /* free the actual buffer */
  268. size = free_memory_on_node(mc, node);
  269. /* remove the mem_chunk from the list */
  270. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  271. _starpu_mem_chunk_delete(mc);
  272. return size;
  273. }
  274. /* This function is called for memory chunks that are possibly in used (ie. not
  275. * in the cache). They should therefore still be associated to a handle. */
  276. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  277. {
  278. size_t freed = 0;
  279. starpu_data_handle_t handle;
  280. handle = mc->data;
  281. STARPU_ASSERT(handle);
  282. /* This data should be written through to this node, avoid dropping it! */
  283. if (handle->wt_mask & (1<<node))
  284. return 0;
  285. /* This data was registered from this node, we will not be able to drop it anyway */
  286. if ((int) node == handle->home_node)
  287. return 0;
  288. /* REDUX memchunk */
  289. if (mc->relaxed_coherency == 2)
  290. {
  291. /* TODO: reduce it back to e.g. main memory */
  292. }
  293. else
  294. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  295. * memchunk that could be used with filters. */
  296. if (mc->relaxed_coherency == 1)
  297. {
  298. STARPU_ASSERT(mc->replicate);
  299. if (_starpu_spin_trylock(&handle->header_lock))
  300. /* Handle is busy, abort */
  301. return 0;
  302. if (mc->replicate->refcnt == 0)
  303. {
  304. /* Note that there is no need to transfer any data or
  305. * to update the status in terms of MSI protocol
  306. * because this memchunk is associated to a replicate
  307. * in "relaxed coherency" mode. */
  308. freed = do_free_mem_chunk(mc, node);
  309. }
  310. _starpu_spin_unlock(&handle->header_lock);
  311. }
  312. /* try to lock all the subtree */
  313. else if (lock_all_subtree(handle))
  314. {
  315. /* check if they are all "free" */
  316. if (may_free_subtree(handle, node))
  317. {
  318. int target = -1;
  319. /* XXX Considering only owner to invalidate */
  320. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  321. /* in case there was nobody using that buffer, throw it
  322. * away after writing it back to main memory */
  323. /* choose the best target */
  324. target = choose_target(handle, node);
  325. if (target != -1) {
  326. #ifdef STARPU_MEMORY_STATS
  327. if (handle->per_node[node].state == STARPU_OWNER)
  328. _starpu_memory_handle_stats_invalidated(handle, node);
  329. #endif
  330. _STARPU_TRACE_START_WRITEBACK(node);
  331. transfer_subtree_to_node(handle, node, target);
  332. _STARPU_TRACE_END_WRITEBACK(node);
  333. #ifdef STARPU_MEMORY_STATS
  334. _starpu_memory_handle_stats_loaded_owner(handle, target);
  335. #endif
  336. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  337. /* now the actual buffer may be freed */
  338. freed = do_free_mem_chunk(mc, node);
  339. }
  340. }
  341. /* unlock the tree */
  342. unlock_all_subtree(handle);
  343. }
  344. return freed;
  345. }
  346. #ifdef STARPU_USE_ALLOCATION_CACHE
  347. /* We assume that mc_lock[node] is taken. is_already_in_mc_list indicates
  348. * that the mc is already in the list of buffers that are possibly used, and
  349. * therefore not in the cache. */
  350. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  351. {
  352. void *data_interface;
  353. /* we found an appropriate mem chunk: so we get it out
  354. * of the "to free" list, and reassign it to the new
  355. * piece of data */
  356. struct _starpu_data_replicate *old_replicate = mc->replicate;
  357. if (old_replicate)
  358. {
  359. old_replicate->allocated = 0;
  360. old_replicate->automatically_allocated = 0;
  361. old_replicate->initialized = 0;
  362. data_interface = old_replicate->data_interface;
  363. }
  364. else
  365. data_interface = mc->chunk_interface;
  366. STARPU_ASSERT(new_replicate->data_interface);
  367. STARPU_ASSERT(data_interface);
  368. memcpy(new_replicate->data_interface, data_interface, mc->size_interface);
  369. if (!old_replicate)
  370. {
  371. /* Free the copy that we made */
  372. free(mc->chunk_interface);
  373. mc->chunk_interface = NULL;
  374. }
  375. /* XXX: We do not actually reuse the mc at the moment, only the interface */
  376. /* mc->data = new_replicate->handle; */
  377. /* mc->footprint, mc->ops, mc->size_interface, mc->automatically_allocated should be
  378. * unchanged ! */
  379. /* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */
  380. if (is_already_in_mc_list)
  381. {
  382. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  383. }
  384. free(mc);
  385. }
  386. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  387. {
  388. unsigned success = 0;
  389. starpu_data_handle_t old_data;
  390. old_data = mc->data;
  391. STARPU_ASSERT(old_data);
  392. /* try to lock all the subtree */
  393. /* and check if they are all "free" */
  394. if (lock_all_subtree(old_data))
  395. {
  396. if (may_free_subtree(old_data, node))
  397. {
  398. success = 1;
  399. /* in case there was nobody using that buffer, throw it
  400. * away after writing it back to main memory */
  401. _STARPU_TRACE_START_WRITEBACK(node);
  402. transfer_subtree_to_node(old_data, node, 0);
  403. _STARPU_TRACE_END_WRITEBACK(node);
  404. /* now replace the previous data */
  405. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  406. }
  407. /* unlock the tree */
  408. unlock_all_subtree(old_data);
  409. }
  410. return success;
  411. }
  412. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  413. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  414. {
  415. if (ops_a->interfaceid != ops_b->interfaceid)
  416. return -1;
  417. int ret = ops_a->compare(data_interface_a, data_interface_b);
  418. return ret;
  419. }
  420. /* This function must be called with mc_lock[node] taken */
  421. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
  422. {
  423. /* go through all buffers in the cache */
  424. struct mc_cache_entry *entry;
  425. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  426. if (!entry)
  427. /* No data with that footprint */
  428. return NULL;
  429. struct _starpu_mem_chunk *mc;
  430. for (mc = _starpu_mem_chunk_list_begin(entry->list);
  431. mc != _starpu_mem_chunk_list_end(entry->list);
  432. mc = _starpu_mem_chunk_list_next(mc))
  433. {
  434. /* Is that a false hit ? (this is _very_ unlikely) */
  435. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops) != 1)
  436. continue;
  437. /* Cache hit */
  438. /* Remove from the cache */
  439. _starpu_mem_chunk_list_erase(entry->list, mc);
  440. mc_cache_nb[node]--;
  441. STARPU_ASSERT(mc_cache_nb[node] >= 0);
  442. mc_cache_size[node] -= mc->size;
  443. STARPU_ASSERT(mc_cache_size[node] >= 0);
  444. return mc;
  445. }
  446. /* This is a cache miss */
  447. return NULL;
  448. }
  449. /* this function looks for a memory chunk that matches a given footprint in the
  450. * list of mem chunk that need to be freed. This function must be called with
  451. * mc_lock[node] taken. */
  452. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  453. {
  454. struct _starpu_mem_chunk *mc, *next_mc;
  455. /* go through all buffers in the cache */
  456. mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint);
  457. if (mc)
  458. {
  459. /* We found an entry in the cache so we can reuse it */
  460. reuse_mem_chunk(node, replicate, mc, 0);
  461. return 1;
  462. }
  463. if (!_starpu_has_not_important_data)
  464. return 0;
  465. /* now look for some non essential data in the active list */
  466. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  467. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  468. mc = next_mc)
  469. {
  470. /* there is a risk that the memory chunk is freed before next
  471. * iteration starts: so we compute the next element of the list
  472. * now */
  473. next_mc = _starpu_mem_chunk_list_next(mc);
  474. if (mc->data->is_not_important && (mc->footprint == footprint))
  475. {
  476. // fprintf(stderr, "found a candidate ...\n");
  477. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  478. return 1;
  479. }
  480. }
  481. return 0;
  482. }
  483. #endif
  484. /*
  485. * Free the memory chuncks that are explicitely tagged to be freed. The
  486. * mc_lock[node] rw-lock should be taken prior to calling this function.
  487. */
  488. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  489. {
  490. struct _starpu_mem_chunk *mc;
  491. struct _starpu_mem_chunk_list *busy_mc_cache;
  492. struct mc_cache_entry *entry, *tmp;
  493. size_t freed = 0;
  494. _starpu_spin_lock(&mc_lock[node]);
  495. HASH_ITER(hh, mc_cache[node], entry, tmp)
  496. {
  497. busy_mc_cache = _starpu_mem_chunk_list_new();
  498. while (!_starpu_mem_chunk_list_empty(entry->list)) {
  499. mc = _starpu_mem_chunk_list_pop_front(entry->list);
  500. starpu_data_handle_t handle = mc->data;
  501. if (handle)
  502. if (_starpu_spin_trylock(&handle->header_lock)) {
  503. /* The handle is still busy, leave this chunk for later */
  504. _starpu_mem_chunk_list_push_back(busy_mc_cache, mc);
  505. continue;
  506. }
  507. mc_cache_nb[node]--;
  508. STARPU_ASSERT(mc_cache_nb[node] >= 0);
  509. mc_cache_size[node] -= mc->size;
  510. STARPU_ASSERT(mc_cache_size[node] >= 0);
  511. freed += free_memory_on_node(mc, node);
  512. if (handle)
  513. _starpu_spin_unlock(&handle->header_lock);
  514. free(mc->chunk_interface);
  515. _starpu_mem_chunk_delete(mc);
  516. if (reclaim && freed >= reclaim)
  517. break;
  518. }
  519. _starpu_mem_chunk_list_push_list_front(busy_mc_cache, entry->list);
  520. _starpu_mem_chunk_list_delete(busy_mc_cache);
  521. if (reclaim && freed >= reclaim)
  522. break;
  523. }
  524. _starpu_spin_unlock(&mc_lock[node]);
  525. return freed;
  526. }
  527. /*
  528. * Try to free the buffers currently in use on the memory node. If the force
  529. * flag is set, the memory is freed regardless of coherency concerns (this
  530. * should only be used at the termination of StarPU for instance). The
  531. * mc_lock[node] should be taken prior to calling this function.
  532. */
  533. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  534. {
  535. size_t freed = 0;
  536. struct _starpu_mem_chunk *mc, *next_mc;
  537. /*
  538. * We have to unlock mc_lock before locking header_lock, so we have
  539. * to be careful with the list. We try to do just one pass, by
  540. * remembering the next mc to be tried. If it gets dropped, we restart
  541. * from zero. So we continue until we go through the whole list without
  542. * finding anything to free.
  543. */
  544. restart:
  545. _starpu_spin_lock(&mc_lock[node]);
  546. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  547. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  548. mc = next_mc)
  549. {
  550. /* mc hopefully gets out of the list, we thus need to prefetch
  551. * the next element */
  552. next_mc = _starpu_mem_chunk_list_next(mc);
  553. if (!force)
  554. {
  555. freed += try_to_free_mem_chunk(mc, node);
  556. if (reclaim && freed >= reclaim)
  557. break;
  558. }
  559. else
  560. {
  561. starpu_data_handle_t handle = mc->data;
  562. if (_starpu_spin_trylock(&handle->header_lock))
  563. {
  564. /* Ergl. We are shutting down, but somebody is
  565. * still locking the handle. That's not
  566. * supposed to happen, but better be safe by
  567. * letting it go through. */
  568. _starpu_spin_unlock(&mc_lock[node]);
  569. goto restart;
  570. }
  571. /* We must free the memory now, because we are
  572. * terminating the drivers: note that data coherency is
  573. * not maintained in that case ! */
  574. freed += do_free_mem_chunk(mc, node);
  575. _starpu_spin_unlock(&handle->header_lock);
  576. }
  577. }
  578. _starpu_spin_unlock(&mc_lock[node]);
  579. return freed;
  580. }
  581. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  582. {
  583. size_t freed = 0;
  584. if (reclaim && !force)
  585. {
  586. static unsigned warned;
  587. if (!warned) {
  588. if (STARPU_ATOMIC_ADD(&warned, 1) == 1)
  589. {
  590. char name[32];
  591. _starpu_memory_node_get_name(node, name, sizeof(name));
  592. _STARPU_DISP("Not enough memory left on node %s. Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges\n", name, (unsigned long) (reclaim / 1048576));
  593. }
  594. }
  595. }
  596. /* remove all buffers for which there was a removal request */
  597. freed += flush_memchunk_cache(node, reclaim);
  598. /* try to free all allocated data potentially in use */
  599. if (reclaim && freed<reclaim)
  600. freed += free_potentially_in_use_mc(node, force, reclaim);
  601. return freed;
  602. }
  603. /*
  604. * This function frees all the memory that was implicitely allocated by StarPU
  605. * (for the data replicates). This is not ensuring data coherency, and should
  606. * only be called while StarPU is getting shut down.
  607. */
  608. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  609. {
  610. return _starpu_memory_reclaim_generic(node, 1, 0);
  611. }
  612. /* Periodic tidy of available memory */
  613. void starpu_memchunk_tidy(unsigned node)
  614. {
  615. starpu_ssize_t total = starpu_memory_get_total(node);
  616. starpu_ssize_t available = starpu_memory_get_available(node);
  617. size_t target, amount;
  618. unsigned minimum_p = starpu_get_env_number_default("STARPU_MINIMUM_AVAILABLE_MEM", 5);
  619. unsigned target_p = starpu_get_env_number_default("STARPU_TARGET_AVAILABLE_MEM", 10);
  620. if (total <= 0)
  621. return;
  622. /* Count cached allocation as being available */
  623. available += mc_cache_size[node];
  624. if (available >= (total * minimum_p) / 100)
  625. /* Enough available space, do not trigger reclaiming */
  626. return;
  627. /* Not enough available space, reclaim until we reach the target. */
  628. target = (total * target_p) / 100;
  629. amount = target - available;
  630. if (tidying[node])
  631. /* Some thread is already tidying this node, let it do it */
  632. return;
  633. if (STARPU_ATOMIC_ADD(&tidying[node], 1) > 1)
  634. /* Some thread got it before us, let it do it */
  635. goto out;
  636. static unsigned warned;
  637. if (!warned) {
  638. if (STARPU_ATOMIC_ADD(&warned, 1) == 1)
  639. {
  640. char name[32];
  641. _starpu_memory_node_get_name(node, name, sizeof(name));
  642. _STARPU_DISP("Low memory left on node %s (%luMiB over %luMiB). Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. The thresholds can be tuned using the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment variables.\n", name, (unsigned long) (available / 1048576), (unsigned long) (total / 1048576), (unsigned long) (amount / 1048576));
  643. }
  644. }
  645. /* TODO: only request writebacks to get buffers clean, without waiting
  646. * for it */
  647. free_potentially_in_use_mc(node, 0, amount);
  648. out:
  649. (void) STARPU_ATOMIC_ADD(&tidying[node], -1);
  650. }
  651. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  652. {
  653. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  654. starpu_data_handle_t handle = replicate->handle;
  655. STARPU_ASSERT(handle);
  656. STARPU_ASSERT(handle->ops);
  657. mc->data = handle;
  658. mc->footprint = _starpu_compute_data_footprint(handle);
  659. mc->ops = handle->ops;
  660. mc->automatically_allocated = automatically_allocated;
  661. mc->relaxed_coherency = replicate->relaxed_coherency;
  662. mc->replicate = replicate;
  663. mc->replicate->mc = mc;
  664. mc->chunk_interface = NULL;
  665. mc->size_interface = interface_size;
  666. return mc;
  667. }
  668. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  669. {
  670. unsigned dst_node = replicate->memory_node;
  671. struct _starpu_mem_chunk *mc;
  672. /* the interface was already filled by ops->allocate_data_on_node */
  673. size_t interface_size = replicate->handle->ops->interface_size;
  674. /* Put this memchunk in the list of memchunk in use */
  675. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  676. _starpu_spin_lock(&mc_lock[dst_node]);
  677. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  678. _starpu_spin_unlock(&mc_lock[dst_node]);
  679. }
  680. /* This function is called when the handle is destroyed (eg. when calling
  681. * unregister or unpartition). It puts all the memchunks that refer to the
  682. * specified handle into the cache.
  683. */
  684. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  685. {
  686. struct _starpu_mem_chunk *mc = replicate->mc;
  687. STARPU_ASSERT(mc->data == handle);
  688. /* Record the allocated size, so that later in memory
  689. * reclaiming we can estimate how much memory we free
  690. * by freeing this. */
  691. mc->size = size;
  692. /* Also keep the interface parameters and pointers, for later reuse
  693. * while detached, or freed */
  694. mc->chunk_interface = malloc(mc->size_interface);
  695. memcpy(mc->chunk_interface, replicate->data_interface, mc->size_interface);
  696. /* This memchunk doesn't have to do with the data any more. */
  697. replicate->mc = NULL;
  698. mc->replicate = NULL;
  699. replicate->allocated = 0;
  700. replicate->automatically_allocated = 0;
  701. replicate->initialized = 0;
  702. _starpu_spin_lock(&mc_lock[node]);
  703. mc->data = NULL;
  704. /* remove it from the main list */
  705. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  706. _starpu_spin_unlock(&mc_lock[node]);
  707. /*
  708. * Unless the user has provided a main RAM limitation, we would fill
  709. * memory with cached data and then eventually swap.
  710. */
  711. /*
  712. * This is particularly important when
  713. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  714. * wouldn't even re-use these allocations!
  715. */
  716. if (handle->ops->dontcache || (starpu_node_get_kind(node) == STARPU_CPU_RAM && starpu_get_env_number("STARPU_LIMIT_CPU_MEM") < 0))
  717. {
  718. /* Free data immediately */
  719. free_memory_on_node(mc, node);
  720. free(mc->chunk_interface);
  721. _starpu_mem_chunk_delete(mc);
  722. }
  723. else
  724. {
  725. /* put it in the list of buffers to be removed */
  726. uint32_t footprint = mc->footprint;
  727. struct mc_cache_entry *entry;
  728. _starpu_spin_lock(&mc_lock[node]);
  729. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  730. if (!entry) {
  731. entry = malloc(sizeof(*entry));
  732. entry->list = _starpu_mem_chunk_list_new();
  733. entry->footprint = footprint;
  734. HASH_ADD(hh, mc_cache[node], footprint, sizeof(entry->footprint), entry);
  735. }
  736. mc_cache_nb[node]++;
  737. mc_cache_size[node] += mc->size;
  738. _starpu_mem_chunk_list_push_front(entry->list, mc);
  739. _starpu_spin_unlock(&mc_lock[node]);
  740. }
  741. }
  742. /*
  743. * In order to allocate a piece of data, we try to reuse existing buffers if
  744. * its possible.
  745. * 1 - we try to reuse a memchunk that is explicitely unused.
  746. * 2 - we go through the list of memory chunks and find one that is not
  747. * referenced and that has the same footprint to reuse it.
  748. * 3 - we call the usual driver's alloc method
  749. * 4 - we go through the list of memory chunks and release those that are
  750. * not referenced (or part of those).
  751. *
  752. */
  753. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  754. {
  755. unsigned attempts = 0;
  756. starpu_ssize_t allocated_memory;
  757. int ret;
  758. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  759. _starpu_spin_checklocked(&handle->header_lock);
  760. _starpu_data_allocation_inc_stats(dst_node);
  761. #ifdef STARPU_USE_ALLOCATION_CACHE
  762. /* perhaps we can directly reuse a buffer in the free-list */
  763. uint32_t footprint = _starpu_compute_data_footprint(handle);
  764. _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size);
  765. _starpu_spin_lock(&mc_lock[dst_node]);
  766. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  767. {
  768. _starpu_spin_unlock(&mc_lock[dst_node]);
  769. _starpu_allocation_cache_hit(dst_node);
  770. return data_size;
  771. }
  772. _starpu_spin_unlock(&mc_lock[dst_node]);
  773. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  774. #endif
  775. STARPU_ASSERT(handle->ops);
  776. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  777. STARPU_ASSERT(replicate->data_interface);
  778. char data_interface[handle->ops->interface_size];
  779. memcpy(data_interface, replicate->data_interface, handle->ops->interface_size);
  780. /* Take temporary reference on the replicate */
  781. replicate->refcnt++;
  782. handle->busy_count++;
  783. _starpu_spin_unlock(&handle->header_lock);
  784. do
  785. {
  786. _STARPU_TRACE_START_ALLOC(dst_node, data_size);
  787. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  788. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  789. {
  790. /* To facilitate the design of interface, we set the
  791. * proper CUDA device in case it is needed. This avoids
  792. * having to set it again in the malloc method of each
  793. * interface. */
  794. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  795. }
  796. #endif
  797. allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node);
  798. _STARPU_TRACE_END_ALLOC(dst_node);
  799. if (allocated_memory == -ENOMEM)
  800. {
  801. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  802. size_t handle_size = handle->ops->get_size(handle);
  803. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  804. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  805. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  806. if (is_prefetch)
  807. {
  808. flush_memchunk_cache(dst_node, reclaim);
  809. }
  810. else
  811. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  812. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  813. }
  814. }
  815. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  816. int cpt = 0;
  817. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  818. {
  819. cpt++;
  820. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  821. }
  822. if (cpt == STARPU_SPIN_MAXTRY)
  823. _starpu_spin_lock(&handle->header_lock);
  824. replicate->refcnt--;
  825. STARPU_ASSERT(replicate->refcnt >= 0);
  826. STARPU_ASSERT(handle->busy_count > 0);
  827. handle->busy_count--;
  828. ret = _starpu_data_check_not_busy(handle);
  829. STARPU_ASSERT(ret == 0);
  830. if (replicate->allocated)
  831. {
  832. /* Argl, somebody allocated it in between already, drop this one */
  833. _STARPU_TRACE_START_FREE(dst_node, data_size);
  834. handle->ops->free_data_on_node(data_interface, dst_node);
  835. _STARPU_TRACE_END_FREE(dst_node);
  836. allocated_memory = 0;
  837. }
  838. else
  839. /* Install allocated interface */
  840. memcpy(replicate->data_interface, data_interface, handle->ops->interface_size);
  841. return allocated_memory;
  842. }
  843. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  844. {
  845. starpu_ssize_t allocated_memory;
  846. unsigned dst_node = replicate->memory_node;
  847. STARPU_ASSERT(handle);
  848. /* A buffer is already allocated on the node */
  849. if (replicate->allocated)
  850. return 0;
  851. STARPU_ASSERT(replicate->data_interface);
  852. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  853. /* perhaps we could really not handle that capacity misses */
  854. if (allocated_memory == -ENOMEM)
  855. return -ENOMEM;
  856. register_mem_chunk(replicate, 1);
  857. replicate->allocated = 1;
  858. replicate->automatically_allocated = 1;
  859. if (replicate->relaxed_coherency == 0 && dst_node == STARPU_MAIN_RAM)
  860. {
  861. /* We are allocating the buffer in main memory, also register it
  862. * for the gcc plugin. */
  863. void *ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
  864. if (ptr != NULL)
  865. {
  866. _starpu_data_register_ram_pointer(handle, ptr);
  867. }
  868. }
  869. return 0;
  870. }
  871. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  872. {
  873. return handle->per_node[memory_node].allocated;
  874. }
  875. /* This memchunk has been recently used, put it last on the mc_list, so we will
  876. * try to evict it as late as possible */
  877. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  878. {
  879. if (!mc)
  880. /* user-allocated memory */
  881. return;
  882. _starpu_spin_lock(&mc_lock[node]);
  883. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  884. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  885. _starpu_spin_unlock(&mc_lock[node]);
  886. }
  887. #ifdef STARPU_MEMORY_STATS
  888. void _starpu_memory_display_stats_by_node(int node)
  889. {
  890. _starpu_spin_lock(&mc_lock[node]);
  891. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  892. {
  893. struct _starpu_mem_chunk *mc;
  894. fprintf(stderr, "#-------\n");
  895. fprintf(stderr, "Data on Node #%d\n",node);
  896. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  897. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  898. mc = _starpu_mem_chunk_list_next(mc))
  899. {
  900. if (mc->automatically_allocated == 0)
  901. _starpu_memory_display_handle_stats(mc->data);
  902. }
  903. }
  904. _starpu_spin_unlock(&mc_lock[node]);
  905. }
  906. #endif
  907. void starpu_data_display_memory_stats(void)
  908. {
  909. #ifdef STARPU_MEMORY_STATS
  910. unsigned node;
  911. fprintf(stderr, "\n#---------------------\n");
  912. fprintf(stderr, "Memory stats :\n");
  913. for (node = 0; node < STARPU_MAXNODES; node++)
  914. {
  915. _starpu_memory_display_stats_by_node(node);
  916. }
  917. fprintf(stderr, "\n#---------------------\n");
  918. #endif
  919. }
  920. static int
  921. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  922. {
  923. int target = -1;
  924. unsigned nnodes = starpu_memory_nodes_get_count();
  925. unsigned int i;
  926. double time_disk = 0;
  927. for (i = 0; i < nnodes; i++)
  928. {
  929. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  930. (_starpu_memory_manager_test_allocate_size(i, _starpu_data_get_size(handle)) == 1 ||
  931. handle->per_node[i].allocated))
  932. {
  933. /* if we can write on the disk */
  934. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  935. {
  936. /* only time can change between disk <-> main_ram
  937. * and not between main_ram <-> worker if we compare diks*/
  938. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  939. if (target == -1 || time_disk > time_tmp)
  940. {
  941. target = i;
  942. time_disk = time_tmp;
  943. }
  944. }
  945. }
  946. }
  947. return target;
  948. }
  949. static unsigned
  950. choose_target(starpu_data_handle_t handle, unsigned node)
  951. {
  952. int target = -1;
  953. size_t size_handle = _starpu_data_get_size(handle);
  954. if (handle->home_node != -1)
  955. /* try to push on RAM if we can before to push on disk */
  956. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  957. {
  958. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  959. _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
  960. {
  961. target = STARPU_MAIN_RAM;
  962. }
  963. else
  964. {
  965. target = get_better_disk_can_accept_size(handle, node);
  966. }
  967. }
  968. /* others memory nodes */
  969. else
  970. {
  971. target = handle->home_node;
  972. }
  973. else
  974. {
  975. /* handle->home_node == -1 */
  976. /* no place for datas in RAM, we push on disk */
  977. if (node == STARPU_MAIN_RAM)
  978. {
  979. target = get_better_disk_can_accept_size(handle, node);
  980. }
  981. /* node != 0 */
  982. /* try to push data to RAM if we can before to push on disk*/
  983. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  984. _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
  985. {
  986. target = STARPU_MAIN_RAM;
  987. }
  988. /* no place in RAM */
  989. else
  990. {
  991. target = get_better_disk_can_accept_size(handle, node);
  992. }
  993. }
  994. /* we haven't the right to write on the disk */
  995. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  996. target = -1;
  997. return target;
  998. }